#!python # -*- mode: python; Encoding: utf-8; coding: utf-8 -*- # Last updated: <2018/12/06 21:56:48 +0900> """ use pydub. divide wav file. and FFT. 【Python/pydub】mp3、wavファイルから部分抽出(切り分け、分割) | アルゴリズム雑記 https://algorithm.joho.info/programming/python/pydub-split/ 【Python/pydub】mp3、wavのデータをNumPy配列に変換 | アルゴリズム雑記 https://algorithm.joho.info/programming/python/pydub-numpy/ 高速フーリエ変換(FFT) - 人工知能に関する断創録 http://aidiary.hatenablog.com/entry/20110618/1308367728 短時間フーリエ変換 - 人工知能に関する断創録 http://aidiary.hatenablog.com/entry/20110716/1310824587 Windows10 x64 + Python 2.7.15 32bit """ import os import sys from pydub import AudioSegment from pydub.playback import play import numpy as np import matplotlib.pyplot as plt ave_enable = 0 draw_wave_enable = 0 play_sound = 1 print_table = 0 export_wav = 1 def draw_wave(dt): row = 8 for i in range(row): plt.subplot(row, 1, i + 1) plt.plot(dt[i]) plt.grid() plt.show() def composie(dt): ndt = [] nwave = [] for src in dt: n = len(src) / 32 nd = np.array([0.0] * 32) nn = 0 if ave_enable: # get average value for i in range(0, len(src), 32): for j in range(32): if i + j < len(src): nd[j] += src[i + j] nn += 1 nd = nd / nn else: # get 32point data only for j in range(32): if j < len(src): nd[j] += src[j] nd[0] = 0.50 * nd[0] + 0.50 * src[32] nd[1] = 0.75 * nd[1] + 0.25 * src[33] # nd[0] = 0.25 * nd[0] + 0.75 * src[32] # nd[1] = 0.50 * nd[1] + 0.50 * src[33] # nd[2] = 0.75 * nd[2] + 0.25 * src[34] nn += 1 ndt.append(nd) dd = np.array([]) for i in range(n): dd = np.append(dd, nd) nwave.append(dd) return ndt, nwave def get_sound_from_numpy_arrays(dt, rate): dt = dt.astype("int16") # print("min = %f , max = %f" % (dt.min(), dt.max())) sound = AudioSegment( dt.tobytes(), sample_width=2, # 2 byte (16 bit) samples frame_rate=rate, # sampling rate channels=1 # mono ) return sound def main(): """Main.""" if len(sys.argv) != 2: print("Usage: python %s WAV_filename" % os.path.basename(__file__)) sys.exit() infile = sys.argv[1] sound = AudioSegment.from_wav(infile) # read wave file data = np.array(sound.get_array_of_samples()) src = data[::sound.channels] # get mono channel # normlize src = (src - src.min()).astype(float) / (src.max() - src.min()).astype(float) rate = sound.frame_rate frm = rate / 60 sample_len = len(src) print("-- Input file : %s" % infile) print("-- Channel : %d" % sound.channels) print("-- Sampling rate : %d Hz" % rate) print("-- 1 Frame length : %f point" % frm) print("-- Duration : %f msec" % len(sound)) print("-- Sample length : %f point" % sample_len) print("-- n : %d" % (sample_len / frm)) # divide dt = [] for i in range(0, sample_len, frm): dt.append(src[i:i + frm]) ndt, nwave = composie(dt) # draw graph if draw_wave_enable: iadd = 0 row = 10 for i in range(0, row, 2): plt.subplot(row, 1, i + 1) plt.plot(dt[iadd + (i / 2)]) plt.ylim(ymax=1.0) plt.grid() plt.subplot(row, 1, i + 2) plt.plot(nwave[iadd + (i / 2)]) plt.ylim(ymax=1.0) plt.grid() plt.show() nw = np.array([]) for d in nwave: nw = np.append(nw, d) org_src = (src - 0.5) * 0x0ffff org_sound = get_sound_from_numpy_arrays(org_src, rate) nww = ((nw - 0.5) * 0x0f).astype("int16") * 0x0fff new_sound = get_sound_from_numpy_arrays(nww, rate) if play_sound: # play(sound) play(org_sound) play(new_sound) if export_wav: fn = "_output.wav" new_sound.export(fn, format="wav") print("-- output : %s" % fn) if print_table: print("tbl={") for src in ndt: d = (src * 0x0f).astype("int16") # print("min,max=%d,%d" % (d.min(), d.max())) # print(d) s = " {" for i in range(0, 32, 2): v = ((d[i + 1] & 0x0f) << 4) | (d[i] & 0x0f) if i < 30: s += "0x%02x," % v else: s += "0x%02x" % v s += "}," print(s) print("}") if __name__ == "__main__": main()