ADTLib

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 83d4f5fb737b858a6ab95d9280ff11b34f3fe975
parent 034f378faae12c4c01ab5454a4dd3535ca544b84
Author: CarlSouthall <[email protected]>
Date:   Thu, 28 Sep 2017 17:31:01 +0100

2.0 full remake

Diffstat:
MADTLib/__init__.py | 34+++++++++++++++++++++++++++++++---
DADTLib/__init__.pyc | 0
AADTLib/files/HihatADTLibAll.data-00000-of-00001 | 0
AADTLib/files/HihatADTLibAll.index | 0
AADTLib/files/HihatADTLibAll.meta | 0
AADTLib/files/KickADTLibAll.data-00000-of-00001 | 0
AADTLib/files/KickADTLibAll.index | 0
AADTLib/files/KickADTLibAll.meta | 0
AADTLib/files/PPParams.npy | 0
AADTLib/files/SnareADTLibAll.data-00000-of-00001 | 0
AADTLib/files/SnareADTLibAll.index | 0
AADTLib/files/SnareADTLibAll.meta | 0
DADTLib/models/__init__.py | 103-------------------------------------------------------------------------------
DADTLib/models/__init__.pyc | 0
DADTLib/nn/NNFiles/BDHAll-1000 | 0
DADTLib/nn/NNFiles/BDHAll-1000.meta | 0
DADTLib/nn/NNFiles/BDKAll-1000 | 0
DADTLib/nn/NNFiles/BDKAll-1000.meta | 0
DADTLib/nn/NNFiles/BDSAll-1000 | 0
DADTLib/nn/NNFiles/BDSAll-1000.meta | 0
DADTLib/nn/__init__.py | 76----------------------------------------------------------------------------
DADTLib/nn/__init__.pyc | 0
MADTLib/utils/__init__.py | 421++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------
DADTLib/utils/__init__.pyc | 0
MLICENSE.txt | 2+-
MREADME.md | 43++++++++++++++++++-------------------------
Abin/ADT | 38++++++++++++++++++++++++++++++++++++++
Dbin/ADTBDRNN | 87-------------------------------------------------------------------------------
Musage.md | 88++++++++++++++++++++++++++++++++++++++-----------------------------------------
29 files changed, 482 insertions(+), 410 deletions(-)

diff --git a/ADTLib/__init__.py b/ADTLib/__init__.py @@ -1,10 +1,37 @@ # -*- coding: utf-8 -*- """ -@author: CarlSouthall +Created on Fri May 26 15:10:50 2017 +@author: CarlSouthall """ -from __future__ import absolute_import, division, print_function -from . import utils, nn, models +from . import utils +import os + +def ADT(filenames,text='yes',tab='yes',save_dir=None): + location=utils.location_extract() + Onsets=[] + for k in filenames: + specs=utils.spec(k) + AFs=utils.system_restore(specs,location) + PP=utils.load_pp_param(location) + Peaks=[] + for j in range(len(AFs)): + Peaks.append(utils.meanPPmm(AFs[j][:,0],PP[j,0],PP[j,1],PP[j,2])) + sorted_p=utils.sort_ascending(Peaks) + if save_dir!=None: + os.chdir(save_dir) + if text=='yes': + utils.print_to_file(sorted_p,k) + + if tab=='yes': + utils.tab_create([Peaks[2],Peaks[1],Peaks[0]],k) + + Onsets.append({'Kick':Peaks[0],'Snare':Peaks[1],'Hihat':Peaks[2]}) + return Onsets + + + + +\ No newline at end of file diff --git a/ADTLib/__init__.pyc b/ADTLib/__init__.pyc Binary files differ. diff --git a/ADTLib/files/HihatADTLibAll.data-00000-of-00001 b/ADTLib/files/HihatADTLibAll.data-00000-of-00001 Binary files differ. diff --git a/ADTLib/files/HihatADTLibAll.index b/ADTLib/files/HihatADTLibAll.index Binary files differ. diff --git a/ADTLib/files/HihatADTLibAll.meta b/ADTLib/files/HihatADTLibAll.meta Binary files differ. diff --git a/ADTLib/files/KickADTLibAll.data-00000-of-00001 b/ADTLib/files/KickADTLibAll.data-00000-of-00001 Binary files differ. diff --git a/ADTLib/files/KickADTLibAll.index b/ADTLib/files/KickADTLibAll.index Binary files differ. diff --git a/ADTLib/files/KickADTLibAll.meta b/ADTLib/files/KickADTLibAll.meta Binary files differ. diff --git a/ADTLib/files/PPParams.npy b/ADTLib/files/PPParams.npy Binary files differ. diff --git a/ADTLib/files/SnareADTLibAll.data-00000-of-00001 b/ADTLib/files/SnareADTLibAll.data-00000-of-00001 Binary files differ. diff --git a/ADTLib/files/SnareADTLibAll.index b/ADTLib/files/SnareADTLibAll.index Binary files differ. diff --git a/ADTLib/files/SnareADTLibAll.meta b/ADTLib/files/SnareADTLibAll.meta Binary files differ. diff --git a/ADTLib/models/__init__.py b/ADTLib/models/__init__.py @@ -1,102 +0,0 @@ -# -*- coding: utf-8 -*- -""" -@author: CarlSouthall -ADTBDRNN - -""" - - -from __future__ import absolute_import, division, print_function - -import madmom -import tensorflow as tf -import numpy as np -import scipy -import os -import inspect -import ADTLib.nn as ADTnn -from ADTLib.utils import Wavread, MeanPP, arrange_output, write_text - - - -def ADTBDRNN(TrackNames, out_sort='time',ret='yes', out_text='no', savedir='current',close_error=0.05,lambd=[9.7,9.9,4.9]): - - ''' Bi-directional neural network algorithm outlined in: - - Southall, C., R. Stables, J. Hockman, Automatic Drum Transcription Using - Bi-directional Recurrent Neural Networks, - Proceedings of the 17th International Society for Music Information - Retrieval Conference (ISMIR), 2016. - - For usage help see github.com/CarlSouthall/ADTLib/usage.md''' - - Time_Steps=1 - WL=2048 - HS=512 - - names=list(np.zeros(len(TrackNames))) - - Track=list(np.zeros(len(TrackNames))) - for i in xrange(len(TrackNames)): - Track[i]=Wavread(TrackNames[i]) - name=TrackNames[i].split('.wav') - names[i]=name[0] - - Frames=list(np.zeros(len(Track))) - Train=list(np.zeros(len(Track))) - for j in xrange(len(Track)): - NFrames=int(np.ceil(len(Track[j])/float(HS))) - Frames[j]=np.zeros((NFrames,WL)) - for i in xrange(NFrames): - Frames[j][i]=np.squeeze(madmom.audio.signal.signal_frame(Track[j],i,WL,HS,origin=-HS)) - - Spectrogram=madmom.audio.spectrogram.spec(madmom.audio.stft.stft(Frames[j],np.hanning(WL), fft_size=WL)) - Train[j]=np.zeros((len(Spectrogram),Time_Steps,len(Spectrogram[0]))) - - for i in xrange(len(Spectrogram)): - for k in xrange(Time_Steps): - if i-k >= 0: - Train[j][i][Time_Steps-k-1]=Spectrogram[i-k,:] - - Path=os.path.split(inspect.getfile(ADTnn)) - NNPath=Path[0] - - Kout=ADTnn.BDRNNRestoreAll(Train,NNPath+'/NNFiles/BDKAll-1000') - Sout=ADTnn.BDRNNRestoreAll(Train,NNPath+'/NNFiles/BDSAll-1000') - Hout=ADTnn.BDRNNRestoreAll(Train,NNPath+'/NNFiles/BDHAll-1000') - - - AF=list(np.zeros(len(Track))) - P=list(np.zeros(len(Track))) - for j in xrange(len(Track)): - AF[j]=list([Kout[j][:,0],Sout[j][:,0],Hout[j][:,0]]) - P[j]=list(np.zeros(3)) - for i in xrange(len(AF[j])): - P[j][i]=MeanPP(AF[j][i],lambd[i]) - x=np.sort(P[j][i]) - peak=[] - if len(x) > 0: - peak=np.append(peak,x[0]) - - for k in xrange(len(x)-1): - if (x[k+1]-peak[len(peak)-1]) >= close_error: - peak=np.append(peak,x[k+1]) - - P[j][i]=peak - - P=arrange_output(P,output_sort=out_sort) - - if out_text == 'yes': - write_text(P,names,save_dir=savedir) - for i in xrange(len(P)): - Pnew=list(np.zeros(2)) - Pnew[0]=np.array(P[i][:,0],dtype=float) - Pnew[1]=np.array(P[i][:,1],dtype=str) - P[i]=Pnew - - if len(P)==1: - P=P[0] - - if ret=='yes': - return P - -\ No newline at end of file diff --git a/ADTLib/models/__init__.pyc b/ADTLib/models/__init__.pyc Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDHAll-1000 b/ADTLib/nn/NNFiles/BDHAll-1000 Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDHAll-1000.meta b/ADTLib/nn/NNFiles/BDHAll-1000.meta Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDKAll-1000 b/ADTLib/nn/NNFiles/BDKAll-1000 Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDKAll-1000.meta b/ADTLib/nn/NNFiles/BDKAll-1000.meta Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDSAll-1000 b/ADTLib/nn/NNFiles/BDSAll-1000 Binary files differ. diff --git a/ADTLib/nn/NNFiles/BDSAll-1000.meta b/ADTLib/nn/NNFiles/BDSAll-1000.meta Binary files differ. diff --git a/ADTLib/nn/__init__.py b/ADTLib/nn/__init__.py @@ -1,76 +0,0 @@ -# -*- coding: utf-8 -*- -""" -@author: CarlSouthall -""" -from __future__ import absolute_import, division, print_function -import numpy as np -import tensorflow as tf -from tensorflow.python.ops import rnn, rnn_cell - -def BDRNNRestoreAll(X, RestoreFileName, num_layers=3,Truncated=1,n_hidden=50,n_classes=2,cells='tanh'): - - tf.reset_default_graph() - batch_size=0; - for i in xrange(len(X)): - if len(X[i]) > batch_size: - batch_size=len(X[i]) - - n_input = len(X[0][0][0]) - n_steps = len(X[0][0]) - state_len=num_layers - - x = tf.placeholder("float", [None, n_steps, n_input]) - y = tf.placeholder("float", [None, n_classes]) - istate_fw = tf.placeholder("float", [None, (state_len)*n_hidden]) - istate_bw = tf.placeholder("float", [None, (state_len)*n_hidden]) - - weights = { 'out': tf.Variable(tf.random_normal([n_hidden*2, n_classes]))} - biases = { 'out': tf.Variable(tf.random_normal([n_classes]))} - - def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases): - - _X = tf.transpose(_X, [1, 0, 2]) - _X = tf.reshape(_X, [-1, n_input]) - - fw_cell_1 = rnn_cell.BasicRNNCell(n_hidden) - bw_cell_1 = rnn_cell.BasicRNNCell(n_hidden) - fw_cell=rnn_cell.MultiRNNCell([fw_cell_1]*num_layers) - bw_cell=rnn_cell.MultiRNNCell([bw_cell_1]*num_layers) - _X = tf.split(0, n_steps, _X) - seq=np.int32(np.ones(batch_size)*Truncated) - - outputs, statefw,statebw = rnn.bidirectional_rnn(fw_cell, bw_cell, _X, - initial_state_fw=_istate_fw, - initial_state_bw=_istate_bw, - sequence_length=seq) - - return tf.matmul(outputs[-1], _weights['out']) + _biases['out'] - - pred = BiRNN(x, istate_fw, istate_bw, weights, biases) - out=tf.nn.softmax(pred) - - init = tf.initialize_all_variables() - saver = tf.train.Saver() - Test=X - oh=list(np.zeros(len(Test))) - with tf.Session() as sess: - - sess.run(init) - saver.restore(sess,RestoreFileName) - for i in xrange (len(Test)): - test_len = len(Test[i]) - if test_len != batch_size: - e=np.zeros((batch_size-test_len,1,len(Test[i][0,0]))) - f=np.concatenate((Test[i],e)) - else: - f=Test[i] - - o = sess.run(out, feed_dict={x: f, - istate_fw: np.zeros((batch_size, (state_len)*n_hidden)), - istate_bw: np.zeros((batch_size, (state_len)*n_hidden)) - }) - oh[i]=o[:test_len] - - - - return oh diff --git a/ADTLib/nn/__init__.pyc b/ADTLib/nn/__init__.pyc Binary files differ. diff --git a/ADTLib/utils/__init__.py b/ADTLib/utils/__init__.py @@ -1,85 +1,369 @@ # -*- coding: utf-8 -*- """ +Created on Fri May 26 15:10:50 2017 + @author: CarlSouthall """ -from __future__ import absolute_import, division, print_function -import scipy.io.wavfile as wav -import numpy as np -import os +import os +import inspect +import madmom +import numpy as np +import subprocess +from fpdf import FPDF +import ADTLib +import tensorflow as tf +from tensorflow.contrib import rnn -def MeanPP(Track,Lambda): +def spec(file): + return madmom.audio.spectrogram.Spectrogram(file, frame_size=2048, hop_size=512, fft_size=2048,num_channels=1) - m=np.mean(Track)*Lambda +def meanPPmm(Track,Lambda,mi,ma,hop=512,fs=44100,dif=0.05): + + m=np.mean(Track)*Lambda; + if ma != 0: + if m>ma: + m=ma + if mi != 0: + if m<mi: + m=mi + TrackNew=np.zeros(len(Track)+2) + TrackNew[1:len(Track)+1]=Track + Track=TrackNew onsets=[] - Track=np.append(Track,0) - for i in xrange(len(Track)): - if Track[i]>Track[i-1] and Track[i]>Track[i+1] and Track[i]> m: - onsets=np.append(onsets,i) + values=[] + for i in range(len(Track)-2): + if Track[i+1] > Track[i] and Track[i+1]>=Track[i+2] and Track[i+1] > m: + onsets=np.append(onsets,i+1) + values=np.append(values,Track[i+1]) + if len(onsets) >0: + onsets=(onsets*hop)/float(fs) + for i in range(1,len(onsets)): + if abs(onsets[i]-onsets[i-1])<dif: + ind=np.argmax(values[i-1:i+1]) + np.delete(onsets,onsets[i-1+ind]) + + return onsets - if len(onsets) > 0: - onsets=(onsets*512)/float(44100) - return onsets - -def Wavread(TrackName): - x=wav.read(TrackName) - y=x[1] - if len(y.shape)>1: - y=np.squeeze(np.sum(y,axis=1)) - y=y/float(np.max(abs(y))) - - return y +def location_extract(): + return os.path.split((inspect.getfile(ADTLib)))[0] -def arrange_output(Inputs,output_sort='time'): - - Names=['BD','SD','HH'] - - Out=list(np.zeros(len(Inputs))) - Out1=list(np.zeros(len(Inputs))) - for i in xrange(len(Inputs)): - - Out[i]=list(np.zeros(len(Inputs[i]))) - Out1[i]=list(np.zeros((1,2))) - for j in xrange(len(Inputs[i])): - Out[i][j]=list(np.zeros((len(Inputs[i][j])))) - for k in xrange(len(Inputs[i][j])): - Out[i][j][k]=list(np.zeros(2)) - Out[i][j][k][0]=Inputs[i][j][k] - Out[i][j][k][1]=Names[j] +def load_pp_param(save_path): + cwd=os.getcwd() + os.chdir(save_path+'/files') + x=np.load('PPParams.npy') + os.chdir(cwd) + return x + + +def tab_create(Onsets,Filename_): + quantisation_per_beat=4 + bars_per_line=4 + notation=['x','o','o'] + pre_trackname=Filename_.split('/') + TrackName=pre_trackname[len(pre_trackname)-1].split('.')[0]+' Drum Tab' - if len(Out[i][j])>1: - Out1[i]=np.concatenate([Out1[i],Out[i][j]],axis=0) - - Out[i]=Out1[i][1:] + subprocess.call(["DBNDownBeatTracker","single","-o","DB.txt",Filename_]) + + DBFile=open("DB.txt") + DBFile=DBFile.read().split('\n') + DBFile=DBFile[:len(DBFile)-1] + for i in range(len(DBFile)): + DBFile[i]=DBFile[i].split('\t') + DBFile[i]=np.array([float(DBFile[i][0]),int(DBFile[i][1])]) + + grid=[] + if len(DBFile)>0: + max_beat=np.max(np.array(DBFile),0)[1] + beat_dif=1/float(quantisation_per_beat) + for i in range(len(DBFile)-1): + k=np.arange(DBFile[i][0],DBFile[i+1][0],(DBFile[i+1][0]-DBFile[i][0])/float(quantisation_per_beat)) + beat_poss=DBFile[i][1] + for j in k: + if beat_poss >= max_beat: + beat_poss=0 + grid.append([j,beat_poss]) + beat_poss+=beat_dif - if output_sort=='time': - Out1[i]=np.array(Out[i][:,0],dtype=float) - Out[i][:,0]=np.array(np.sort(Out1[i]),dtype=str) - indexs=np.argsort(Out1[i]) - out_names=list(Out[i][:,1]) - for j in xrange(len(indexs)): - Out[i][j,1]=out_names[indexs[j]] + quantisation_per_bar=int(max_beat*quantisation_per_beat) - - return Out - -def write_text(X,names,suffix='.ADT.txt',save_dir='current'): - - if save_dir != 'current': - current_dir=os.getcwd() - os.chdir(save_dir) - - for i in xrange(len(names)): - file = open(names[i]+suffix, "w") - for j in xrange(len(X[i])): - X[i][j][0]=X[i][j][0][0:8] - item=" ".join(X[i][j]) - file.write("%s\n" % item) + grid=np.array(grid) + + num_bars=np.ceil(grid.shape[0]/float(quantisation_per_bar)) + bar_grid=[] + bar_start=np.expand_dims(np.transpose(['HH','SD','KD']),1) + bar_end=np.expand_dims(np.transpose(['|','|','|']),1) + for i in range(3): + bar_grid.append(['|']) + for j in range(quantisation_per_bar): + bar_grid[i].append('-') + + + num_lines=np.int(np.floor(num_bars/float(bars_per_line))) + last_line=num_bars%float(bars_per_line) + lines=[] + lines_new=[] + for i in range(num_lines): + lines.append(np.concatenate((bar_start,np.tile(bar_grid,int(bars_per_line)),bar_end),1)) + lines_new.append([]) + for j in range(len(lines[i])): + lines[i][j]=list(lines[i][j]) + + + + if last_line > 0: + i+=1 + lines.append(np.concatenate((bar_start,np.tile(bar_grid,int(last_line)),bar_end),1)) + lines_new.append([]) + for j in range(len(lines[i])): + lines[i][j]=list(lines[i][j]) + + + + + onset_locations=[] + onset_line=[] + onset_bar_location=[] + onset_tab_location=[] + + for i in range(len(Onsets)): + onset_locations.append([]) + onset_line.append([]) + onset_tab_location.append([]) + onset_bar_location.append([]) + for j in range(len(Onsets[i])): + onset_locations[i].append(np.argmin(np.abs(grid[:,0]-Onsets[i][j]))) + onset_line[i].append(np.floor(onset_locations[i][j]/(float(quantisation_per_bar*bars_per_line)))) + onset_bar_location[i].append((onset_locations[i][j]-((onset_line[i][j])*quantisation_per_bar*bars_per_line))) + onset_tab_location[i].append(onset_bar_location[i][j]+2) + for k in range(bars_per_line-1): + if onset_bar_location[i][j]>=(k+1)*quantisation_per_bar: + onset_tab_location[i][j]+=1 + lines[int(onset_line[i][j])][i][int(onset_tab_location[i][j])]=notation[i] - if save_dir != 'current': - os.chdir(current_dir) - + lines_new=[] + for i in range(len(lines)): + lines_new.append([]) + for j in range(len(lines[i])): + lines_new[i].append(''.join(lines[i][j])) - -\ No newline at end of file + + pdf = FPDF(format='A4') + pdf.add_page() + pdf.set_font("Courier", size=12) + pdf.cell(200, 10, txt=TrackName,ln=1, align="C") + pdf.set_font("Courier", size=10) + + for i in range(len(lines_new)): + for j in range(len(lines_new[i])): + pdf.cell(0,3,txt=lines_new[i][j],ln=1,align="C") + pdf.cell(0,5,txt='',ln=1,align="C") + pdf.output(pre_trackname[len(pre_trackname)-1].split('.')[0]+'_drumtab.pdf') + + + os.remove("DB.txt") + else: + print('Error: No beat detected') + +def sort_ascending(x): + in_re=[] + out_re_final=[] + in_symbols=['KD','SD','HH'] + for j in range(len(x)): + in_re.append(np.concatenate(((np.expand_dims(x[j],1),np.tile(in_symbols[j],[len(x[j]),1]))),1)) + in_re=np.vstack(in_re) + sorted_ind=(in_re[:,0]).astype(float).argsort() + for j in sorted_ind: + out_re_final.append([in_re[j]]) + out_re_final=np.squeeze(np.array(out_re_final)) + return out_re_final + +def print_to_file(onsets,Filename): + pre_trackname=Filename.split('/') + f = open(pre_trackname[len(pre_trackname)-1].split('.')[0]+'.ADT.txt', "w") + for item,item2 in onsets: + f.write("%.4f \t %s \n" % (float(item), item2)) + + f.close() + + + +class SA: + + def __init__(self, training_aug_data=[],training_data=[], training_labels=[], validation_data=[], validation_labels=[], mini_batch_locations=[], network_save_filename=[], minimum_epoch=5, maximum_epoch=10, n_hidden=[20,20], n_classes=2, cell_type='LSTMP', configuration='B', attention_number=2, dropout=0.75, init_method='zero', truncated=1000, optimizer='Adam', learning_rate=0.003 ,display_train_loss='True', display_accuracy='True',save_location=[],output_act='softmax',snippet_length=100,aug_prob=0): + self.train_aug=training_aug_data + self.features=training_data + self.targ=training_labels + self.val=validation_data + self.val_targ=validation_labels + self.mini_batch_locations=mini_batch_locations + self.filename=network_save_filename + self.n_hidden=n_hidden + self.n_layers=len(self.n_hidden) + self.cell_type=cell_type + self.dropout=dropout + self.configuration=configuration + self.init_method=init_method + self.truncated=truncated + self.optimizer=optimizer + self.learning_rate=learning_rate + self.n_classes=n_classes + self.minimum_epoch=minimum_epoch + self.maximum_epoch=maximum_epoch + self.display_train_loss=display_train_loss + self.num_batch=len(self.mini_batch_locations) + self.batch_size=self.mini_batch_locations.shape[1] + self.attention_number=attention_number + self.display_accuracy=display_accuracy + self.batch=np.zeros((self.batch_size,self.features.shape[1])) + self.batch_targ=np.zeros((self.batch_size,self.targ.shape[2])) + self.save_location=save_location + self.output_act=output_act + self.snippet_length=snippet_length + self.aug_prob=aug_prob + + def cell_create(self,scope_name): + with tf.variable_scope(scope_name): + if self.cell_type == 'tanh': + cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) + elif self.cell_type == 'LSTM': + cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) + elif self.cell_type == 'GRU': + cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) + elif self.cell_type == 'LSTMP': + cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) + cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph) + return cells + + def weight_bias_init(self): + + if self.init_method=='zero': + self.biases = tf.Variable(tf.zeros([self.n_classes])) + elif self.init_method=='norm': + self.biases = tf.Variable(tf.random_normal([self.n_classes])) + if self.configuration =='B': + if self.init_method=='zero': + self.weights =tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)]*2, self.n_classes])) + elif self.init_method=='norm': + self.weights = { '1': tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)], self.n_classes])),'2': tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)], self.n_classes]))} + if self.configuration =='R': + if self.init_method=='zero': + self.weights = tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)], self.n_classes])) + elif self.init_method=='norm': + self.weights = tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)], self.n_classes])) + + def attention_weight_init(self,num): + if num==0: + self.attention_weights=[tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)]*4,self.n_hidden[(len(self.n_hidden)-1)]*2]))] + self.sm_attention_weights=[tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)]*2,self.n_hidden[(len(self.n_hidden)-1)]*2]))] + if num>0: + self.attention_weights.append(tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)]*4,self.n_hidden[(len(self.n_hidden)-1)]*2]))) + self.sm_attention_weights.append(tf.Variable(tf.random_normal([self.n_hidden[(len(self.n_hidden)-1)]*2,self.n_hidden[(len(self.n_hidden)-1)]*2]))) + def create(self): + + tf.reset_default_graph() + self.weight_bias_init() + self.x_ph = tf.placeholder("float32", [1, self.batch.shape[0], self.batch.shape[1]]) + self.y_ph = tf.placeholder("float32", self.batch_targ.shape) + self.seq=tf.constant(self.truncated,shape=[1]) + self.seq2=tf.constant(self.truncated,shape=[1]) + self.dropout_ph = tf.placeholder("float32") + self.fw_cell=self.cell_create('1') + self.fw_cell2=self.cell_create('2') + if self.configuration=='R': + self.outputs, self.states= tf.nn.dynamic_rnn(self.fw_cell, self.x_ph, + sequence_length=self.seq,dtype=tf.float32) + if self.attention_number >0: + self.outputs_zero_padded=tf.pad(self.outputs,[[0,0],[self.attention_number,0],[0,0]]) + self.RNNout1=tf.stack([tf.reshape(self.outputs_zero_padded[:,g:g+(self.attention_number+1)],[self.n_hidden[(len(self.n_hidden)-1)]*((self.attention_number)+1)]) for g in range(self.batch_size)]) + self.presoft=tf.matmul(self.RNNout1, self.weights) + self.biases + else: + self.presoft=tf.matmul(self.outputs[0][0], self.weights) + self.biases + elif self.configuration=='B': + self.bw_cell=self.cell_create('1') + self.bw_cell2=self.cell_create('2') + with tf.variable_scope('1'): + self.outputs, self.states= tf.nn.bidirectional_dynamic_rnn(self.fw_cell, self.bw_cell, self.x_ph, + sequence_length=self.seq,dtype=tf.float32) + + self.first_out=tf.concat((self.outputs[0],self.outputs[1]),2) + with tf.variable_scope('2'): + self.outputs2, self.states2= tf.nn.bidirectional_dynamic_rnn(self.fw_cell2, self.bw_cell2, self.first_out, + sequence_length=self.seq2,dtype=tf.float32) + self.second_out=tf.concat((self.outputs2[0],self.outputs2[1]),2) + + for i in range((self.attention_number*2)+1): + self.attention_weight_init(i) + + + + self.zero_pad_second_out=tf.pad(tf.squeeze(self.second_out),[[self.attention_number,self.attention_number],[0,0]]) +# self.attention_chunks.append(self.zero_pad_second_out[j:j+attention_number*2]) + self.attention_m=[tf.tanh(tf.matmul(tf.concat((self.zero_pad_second_out[j:j+self.batch_size],tf.squeeze(self.first_out)),1),self.attention_weights[j])) for j in range((self.attention_number*2)+1)] + self.attention_s=tf.nn.softmax(tf.stack([tf.matmul(self.attention_m[i],self.sm_attention_weights[i]) for i in range(self.attention_number*2+1)]),0) + self.attention_z=tf.reduce_sum([self.attention_s[i]*self.zero_pad_second_out[i:self.batch_size+i] for i in range(self.attention_number*2+1)],0) + self.presoft=tf.matmul(self.attention_z,self.weights)+self.biases + if self.output_act=='softmax': + self.pred=tf.nn.softmax(self.presoft) + self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph)) + elif self.output_act=='sigmoid': + self.pred=tf.nn.sigmoid(self.presoft) + self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph)) + if self.optimizer == 'GD': + self.optimize = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.cost) + elif self.optimizer == 'Adam': + self.optimize = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost) + elif self.optimizer == 'RMS': + self.optimize = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.cost) + self.correct_pred = tf.equal(tf.argmax(self.pred,1), tf.argmax(self.y_ph,1)) + self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32)) + self.init = tf.global_variables_initializer() + self.saver = tf.train.Saver() + self.saver_var = tf.train.Saver(tf.trainable_variables()) + if self.save_location==[]: + self.save_location=os.getcwd() + + + def locations_create(self,size): + self.locations=range(size) + self.dif=size%self.batch_size + if self.dif>0: + for i in xrange(self.batch_size-self.dif): + self.locations=np.append(self.locations,0) + self.location_new=np.reshape(self.locations,[-1,self.batch_size]) + return self.location_new + + + def implement(self,data): + with tf.Session() as sess: + self.saver.restore(sess, self.save_location+'/'+self.filename) + self.test_out=[]; + for i in xrange(len(data)): + self.test_len=len(data[i]) + self.test_locations=self.locations_create(self.test_len) + for k in xrange(len(self.test_locations)): + for j in xrange(self.batch_size): + self.batch[j]=data[i][self.test_locations[k,j]] + if k == 0: + self.test_out.append(sess.run(self.pred, feed_dict={self.x_ph: np.expand_dims(self.batch,0),self.dropout_ph:1})) + elif k > 0: + self.test_out_2=sess.run(self.pred, feed_dict={self.x_ph: np.expand_dims(self.batch,0),self.dropout_ph:1}) + self.test_out[i]=np.concatenate((self.test_out[i],self.test_out_2),axis=0) + self.test_out[i]=self.test_out[i][:self.test_len] + + return self.test_out + +def system_restore(data,save_path): + ins=['Kick','Snare','Hihat'] + out=[] + for i in ins: + if i =='Kick': + NN=SA([],np.zeros((1,1024)),np.zeros((1,1,2)),mini_batch_locations=np.zeros([1,1000]),network_save_filename=i+'ADTLibAll',save_location=save_path+"/files",cell_type='LSTMP',attention_number=2,n_hidden=[20,20],n_classes=2,truncated=1000,configuration='B',optimizer='GD') + NN.create() + NN.filename=i+'ADTLibAll' + out.append(NN.implement([data])[0]) + + return out + diff --git a/ADTLib/utils/__init__.pyc b/ADTLib/utils/__init__.pyc Binary files differ. diff --git a/LICENSE.txt b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2016, CarlSouthall +Copyright (c) 2017, CarlSouthall All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/README.md b/README.md @@ -1,59 +1,49 @@ # Automatic Drum Transcription Library (ADTLib) -The automatic drum transcription (ADT) library contains open source ADT algorithms to aid other researchers in areas of music information retrieval (MIR). +The automatic drum transcription (ADT) library contains open source ADT algorithms to aid other researchers in areas of music information retrieval (MIR). The algorithms return both a .txt file of kick drum, snare drum, and hi-hat onsets and an automatically generated drum tabulature. ## License -This library is published under the BSD license which allows redistribution and modification as long as the copyright and disclaimers are contained. The full license information can be found on the [license](https://github.com/CarlSouthall/AutoDrumTranscritpion/blob/master/LICENSE) page. +This library is published under the BSD license which allows redistribution and modification as long as the copyright and disclaimers are contained. The full license information can be found on the [license](https://github.com/CarlSouthall/ADTLibNew/blob/master/LICENSE.txt) page. ## Installation #### Required Packages • [numpy](https://www.numpy.org) -• [scipy](https://www.scipy.org) -• [cython](https://www.cython.org) -• [madmom](https://github.com/CPJKU/madmom) -• [tensorflow](https://www.tensorflow.org/) +• [scipy](https://www.scipy.org) +• [madmom](https://github.com/CPJKU/madmom) +• [tensorflow](https://www.tensorflow.org/) +• [fpdf](https://pyfpdf.readthedocs.io/en/latest/) (for tab creation) -The easiest and suggested method to install the libary is to use pip. +The easiest and suggested method to install the library is to use pip: pip install ADTLib -To update the libary use +To update the library use: - pip install --upgrade ADTlib + pip install --upgrade ADTLib -For futher install information see the [install](https://github.com/CarlSouthall/ADTLib/blob/master/install.md) page. - - -## Algorithms - -The algorithms that are currently contained within the libary are: - -• ADTBDRNN: Bi-directional architecture outlined in [1] - ## Usage Algorithms contained within the library are both available as functions for use within python and as command line executable programmes. -###Examples +### Examples #### Command line - ADTBDRNN DrumFile1.wav DrumFile2.wav + ADT Drum.wav #### Python function ```Python -from ADTLib.models import ADTBDRNN +from ADTLib import ADT -TrackNames=['DrumFile1.wav','DrumFile2.wav'] -out=ADTBDRNN(TrackNames) +out=ADT(['Drum.wav']) ``` -See the [usage](https://github.com/CarlSouthall/ADTLib/blob/master/usage.md) page for more information. +See the [usage](https://github.com/CarlSouthall/ADTLibNew/blob/master/usage.md) page for more information. ## References @@ -61,7 +51,10 @@ See the [usage](https://github.com/CarlSouthall/ADTLib/blob/master/usage.md) pag | **[1]** | **[Southall, C., R. Stables, J. Hockman, Automatic Drum Transcription Using Bi-directional Recurrent Neural Networks, Proceedings of the 17th International Society for Music Information Retrieval Conference (ISMIR), 2016.](https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/217_Paper.pdf)**| | :---- | :--- | -##Help +| **[2]** | **[Southall, C., R. Stables, J. Hockman, Automatic Drum Transcription For Polyphonic Recordings Using Soft Attention Mechanisms and Convolutional Neural Networks, Proceedings of the 18th International Society for Music Information Retrieval Conference (ISMIR), 2017.](http://www.ryanstables.co.uk/docs/ISMIR2017CamReady.pdf)**| +| :---- | :--- | + +## Help Any questions please feel free to contact me on [email protected] diff --git a/bin/ADT b/bin/ADT @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@author: CarlSouthall +ADT + +""" +from __future__ import absolute_import, division, print_function +import argparse +import ADTLib as ADT + +p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,description=''' + Flag Name Description Default setting + -h help displays help file n/a + -od output_dir location output files are saved current + -o output_text defines whether the output is stored in a textfile or not yes + -ot output_tab defines whether a tabulature is created and saved to a pdf yes + I input_file_names single or list of wav file names seperated by spaces n/a + + For further usage help see github.com/CarlSouthall/ADTLib/usage.md + ''') +p = argparse.ArgumentParser(prog='ADT') +p.add_argument('I', nargs='*',help='InputFileNames') +p.add_argument('-od', nargs=1,help='output_dir', default=None) +p.add_argument('-o', nargs=1,help='output_text', choices=['yes','no'], default='yes') +p.add_argument('-ot', nargs=1,help='output_tab', choices=['yes','no'], default='yes') +args=p.parse_args() +TrackNames=args.I + +if args.od!=None: + args.od=args.od[0] +if args.o!='yes': + args.o=args.o[0] +if args.ot!='yes': + args.ot=args.ot[0] + +out=ADT.ADT(TrackNames, text=args.o, tab=args.ot, save_dir=args.od) + diff --git a/bin/ADTBDRNN b/bin/ADTBDRNN @@ -1,87 +0,0 @@ -#!/usr/bin/env python2.7 -# -*- coding: utf-8 -*- -""" -@author: CarlSouthall -ADTBDRNN - -""" - -from __future__ import absolute_import, division, print_function -import argparse -import ADTLib as ADT -from subprocess import call - - - -p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,description=''' -ADTBDRNN is based on the bi-directional neural network architecture outlined within: - - Southall, C., R. Stables, J. Hockman, Automatic Drum Transcription Using - Bi-directional Recurrent Neural Networks, - Proceedings of the 17th International Society for Music Information - Retrieval Conference (ISMIR), 2016. - - Flag Name Description Default setting - -h help displays help file n/a - -os output_sort defines configuration of the output time - -od output_dir location output textfiles are saved current - -p print defines whether the output is displayed in the terminal or not yes - -ot output_text defines whether the ouput is stored in a textfile or not yes - I input_file_names single or list of wav file names seperated by spaces n/a - - For further usage help see github.com/CarlSouthall/ADTLib/usage.md - ''') -p = argparse.ArgumentParser(prog='ADTBDRNN') -p.add_argument('I', nargs='*',help='InputFileNames',) -p.add_argument('-os', nargs=1,help='output_sort', choices=['time', 'instrument'], default='time') -p.add_argument('-od', nargs=1,help='output_dir', default='current') -p.add_argument('-p', nargs=1,help='print', default='yes',choices=['yes', 'no']) -p.add_argument('-ot', nargs=1,help='output_text', choices=['yes','no'], default='yes') -args=p.parse_args() -TrackNames=args.I -suffix='.ADT.txt' - -if args.os!='time': - args.os=args.os[0] -if args.od!='current': - args.od=args.od[0] -if args.p!='yes': - args.p=args.p[0] -if args.ot!='yes': - args.ot=args.ot[0] - -out=ADT.models.ADTBDRNN(TrackNames, out_sort=args.os, out_text=args.ot, savedir=args.od) - -if args.p=='yes' and args.ot=='no': - print(out) -elif args.p=='yes' and args.ot=='yes': - for i in xrange(len(TrackNames)): - name=TrackNames[i].split('.wav') - TrackName=name[0] - - if args.od=='current': - d=TrackName+suffix - if i==0: - print() - print ("Track"+str(i)) - print(TrackNames[i]) - print() - call(["cat", d]) - if i < len(TrackNames)-1: - print() - else: - d=savedir+TrackName+suffix - if i==0: - print() - print ("Track"+str(i)) - print(TrackNames[i]) - print() - call(["less", d]) - if i < len(TrackNames)-1: - print() - - - - - - diff --git a/usage.md b/usage.md @@ -1,74 +1,70 @@ -#Usage +# Usage -This file contains information regarding using the algorithms within the toolbox. +This file contains information regarding using the toolbox. -##ADTBDRNN +### Command line -ADT architecture defined in [1]. - -Input: wavfiles names along with control parameters - -Output: kick, snare, hihat onsets in seconds. - -###Command line - - ADT [-h] [-os] [-od] [-p] [-ot] [I [I ...]] + ADT [-h] [-od {None,dir}] [-o {yes,no}] [-ot {yes,no}] [I [I ...]] | Flag | Name | Description | Default setting | | ---- | ------- | ----- | ------ | -| -h | help | displays help file | n/a | -| -os | output_sort | defines configuration of the output | time | -| -od | output_dir | location output textfiles are saved | current | -| -p | print | defines whether the output is displayed in the terminal or not | yes | -| -ot | output_text | defines whether the output is stored in a textfile or not | yes| -| I | input_file_names| single or list of wav file names separated by spaces | n/a | +| -h | help | displays help file | n/a | +| -od | output_dir | location output .txt files are saved | None | +| -o | output_text | defines whether the output is stored in a .txt file or not | yes | +| -ot | output_tab | defines whether a tabulature is created and saved to a pdf | yes| +| I | input_file_names| single or list of drum.wav file names separated by spaces | n/a | -#####Examples +##### Examples - ADTBDRNN Drum.wav + ADT Drum.wav - ADTBDRNN Drum.wav Drum1.wav Drum2.wav +Perform ADT on a single audio file. Saves onset times to a .txt file in the current directory. Creates a drum tabulature and saves it as a pdf in the current directory. - ADTBDRNN -od ~/Desktop -o no -ot yes Drum.wav Drum1.wav Drum2.wav + ADT Drum.wav Drum1.wav Drum2.wav - Output ordered by time printed to a text file in current directory and printed in terminal - - ADTBDRNN -os instrument -od ~/Desktop -p no -ot yes Drum.wav DrumFile1.wav DrumFile2.wav - - Output ordered by instrument printed to a text file on the desktop. +Perform ADT on multiple audio files. Saves onset times to a .txt file in the current directory. Creates a drum tabulature and saves it as a pdf in the current directory. + + ADT -od ~/Desktop -o no ~/Drum.wav ~/Desktop/Drum1.wav + +Perform ADT on multiple audio files from different directories. Creates a drum tabulature but not a .txt file and saves it to the desktop. -###Python function +### Python function + + ```Python -ADTBDRNN(TrackNames, out_sort='time',ret='yes', out_text='no', savedir='current',close_error=0.05,lambd=[9.7,9.9,4.9]) + +Onsets=ADT(filenames, text='yes', tab='yes', save_dir=None) + ``` | Name | Description | Default setting | | ------- | ----- | ------ | -| TrackNames | Drum.wav files, must be in a list if more than one. | n/a | -| out_sort | defines configuration of the output | time | -| savedir | location output textfiles are saved | current| -| ret | defines whether the output is returned from the function | yes| -| out_text | defines whether the ouput is stored in a textfile or not | no| -| close_error| Maximum distance between two onsets without onsets being combined, in seconds. | 0.05 | -| lambd| Value used for each instrument within the peak picking stage | 9.7 9.9 9.4 | +| filenames | Drum.wav files, must be in a list. | n/a | +| text | defines whether the output is stored in a .txt file or not ('yes','no' ) | 'yes' | +| tab | defines whether a tabulature is created and saved to a pdf ('yes','no' ) | 'yes' | +| save_dir | location output .txt files are saved ('None' (saves in current dir), dir) | None | -#####Examples +##### Examples ```Python -from ADTLib.models import ADTBDRNN +from ADTLib import ADT -Filenames='Drumfile.wav' -X=ADTBDRNN(Filenames) +Onsets=ADT(['Drum.wav']) ``` -Output stored in variable X ordered by time. - +Perform ADT on a single audio file. Saves onset times to a .txt file in the current directory. Creates a drum tabulature and saves it as a pdf in the current directory. Returns onset times per instrument. + ```Python -from ADTLib.models import ADTBDRNN +from ADTLib import ADT -Filenames=['Drumfile.wav','Drumfile1.wav'] -ADTBDRNN(Filenames,out_sort='instrument',ret='no',out_text='yes',savedir='Desktop') +Onsets=ADT(['Drum.wav', 'Drum1.wav', 'Drum2.wav']) ``` -Output ordered by instrument printed to a text file on the desktop. +Perform ADT on multiple audio files. Saves onset times to a .txt file in the current directory. Creates a drum tabulature and saves it as a pdf in the current directory. Returns onset times per instrument. + +```Python +from ADTLib import ADT +Onsets=ADT('~/Drum.wav', '~/Desktop/Drum1.wav', text='no', save_dir='~/Desktop') +``` +Perform ADT on multiple audio files from different directories. Creates a drum tabulature but not a .txt file and saves it to the desktop. Returns onset times per instrument.