#!python # -*- mode: python; Encoding: utf-8; coding: utf-8 -*- # Last updated: <2018/12/07 01:28:47 +0900> """ use pydub. divide wav file. and FFT. 【Python/pydub】mp3、wavファイルから部分抽出(切り分け、分割) | アルゴリズム雑記 https://algorithm.joho.info/programming/python/pydub-split/ 【Python/pydub】mp3、wavのデータをNumPy配列に変換 | アルゴリズム雑記 https://algorithm.joho.info/programming/python/pydub-numpy/ 高速フーリエ変換(FFT) - 人工知能に関する断創録 http://aidiary.hatenablog.com/entry/20110618/1308367728 短時間フーリエ変換 - 人工知能に関する断創録 http://aidiary.hatenablog.com/entry/20110716/1310824587 Windows10 x64 + Python 2.7.15 32bit """ import os import sys from pydub import AudioSegment # from pydub.playback import play import numpy as np import matplotlib.pyplot as plt draw_wave_enable = 0 window_enable = 0 def fft(dt, rate): """FFT.""" specs = [] freqlists = [] ampspecs = [] phasespecs = [] for d in dt: n = len(d) if window_enable: # use Hamming window wdw = np.hamming(n) # Hamming Window windowedData = wdw * d spec = np.fft.fft(windowedData) # FFT else: # not use window spec = np.fft.fft(d) # FFT freqlist = np.fft.fftfreq(n, d=1.0 / rate) ampspec = [np.sqrt(c.real ** 2 + c.imag ** 2) for c in spec] phasespec = [np.arctan2(int(c.imag), int(c.real)) for c in spec] specs.append(spec) freqlists.append(freqlist) ampspecs.append(ampspec) phasespecs.append(phasespec) return specs, freqlists, ampspecs, phasespecs def draw_fft(ampspecs, freqlists, rate, iadd): """Draw FFT.""" row = 8 for i in range(row): plt.subplot(row, 1, i + 1) plt.plot(freqlists[iadd + i], ampspecs[iadd + i]) plt.axis([0, rate / 2, 0, 100]) plt.xlabel("[Hz]") plt.ylabel("amp spec") plt.grid() plt.show() def draw_wave(dt, iadd): row = 8 for i in range(row): plt.subplot(row, 1, i + 1) plt.plot(dt[iadd + i]) plt.ylim(ymax=1.0) plt.grid() plt.show() def main(): """Main.""" if len(sys.argv) != 2: print("Usage: python %s WAV_filename" % os.path.basename(__file__)) sys.exit() infile = sys.argv[1] sound = AudioSegment.from_wav(infile) # read wave file # play(sound) data = np.array(sound.get_array_of_samples()) x = data[::sound.channels] # get mono channel # normlize x = (x - x.min()).astype(float) / (x.max() - x.min()).astype(float) rate = sound.frame_rate sample_len = len(x) frm = rate / 60 n = sample_len / frm print("-- Input file : %s" % infile) print("-- Channel : %d" % sound.channels) print("-- Sampling rate : %d Hz" % rate) print("-- Duration : %f msec" % len(sound)) print("-- Sample length : %d point" % sample_len) print("-- 1 Frame length : %d point" % frm) print("-- n : %d" % n) # divide dt = [] for i in range(0, sample_len, frm): dt.append(x[i:i + frm]) if draw_wave_enable: # draw wave draw_wave(dt, 0) else: # FFT specs, freqlists, ampspecs, phasespecs = fft(dt, rate) draw_fft(ampspecs, freqlists, rate, 0) if __name__ == "__main__": main()