import wave
import numpy
from pyaudio import PyAudio
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import mathwf = wave.open('e:/tests/audio2miditest.wav', 'rb')
# 创建PyAudio对象
p = PyAudio()
stream = p.open(format = p.get_format_from_width(wf.getsampwidth()),channels = wf.getnchannels(),rate = wf.getframerate(),output = True)
nframes = wf.getnframes()
print("nframes",nframes)
framerate = wf.getframerate()# 读取完整的帧数据到str_data中,这是一个string类型的数据
str_data = wf.readframes(nframes)
wf.close()# 将波形数据转换成数组
wave_data = numpy.fromstring(str_data, dtype=numpy.short)
# 将wave_data数组改为2列,行数自动匹配
wave_data.shape = -1,2
# 将数组转置
wave_data = wave_data.T
print("wave_data shape",wave_data.shape)#读取片段数
nsegments=int(wave_data.shape[1]/framerate)
print("nsegments",nsegments)def freqs():# 采样点数,修改采样点数和起始位置进行不同位置和长度的音频波形分析N = 44100df = framerate/(N-1) # 分辨率freq = [df*n for n in range(0, N)] # N个元素print("freq",freq)#片段频率数组seg_freqs=[]#钢琴键序号组key_nums=[]for i in range(nsegments):start = i*N # 开始采样位置end=start+N wave_data2 = wave_data[0][start:end]c = numpy.fft.fft(wave_data2)*2/N# 常规显示采样频率一半的频谱d = int(len(c)/2)# 仅显示频率在4000以下的频谱while freq[d] > 4000:d -= 10vf_dict=dict(zip(abs(c[:d-1]),freq[:d-1]))max_v=max(abs(c[:d-1]))max_f=vf_dict[max_v] seg_freqs.append(max_f)print("max f:v",max_f,max_v)#计算最接近的钢琴键号key_num_nearest=round(math.log(max_f/27.5,1.059))+1key_nums.append(key_num_nearest)print("key_nums",key_nums)#可视化plt.rcParams['font.sans-serif']=['SimHei']plt.figure(figsize=(20,20), dpi=90)xs=range(len(seg_freqs))ys=seg_freqs #频谱ax1 = plt.subplot(211)ax1.plot(xs, ys,color="blue") ax1.set_title("频率时序谱")ax1.set_xlabel("时间 S")ax1.set_ylabel("时点主频率")xs1=range(len(key_nums))ys1=key_nums#钢琴键谱ax2 = plt.subplot(212)ax2.scatter(xs1, ys1,color="red") ax2.set_xticks(xs1)ax2.set_yticks(range(min(ys1)-1,max(ys1)+1))ax2.set_title("钢琴键时序谱")ax2.set_xlabel("时间 S")ax2.set_ylabel("时点对应钢琴键序号")ax2.grid()plt.show()def main():freqs()if __name__ == '__main__':main()