https://blog.csdn.net/weixin_42485817/article/details/107590846
如图所示,产出如此优美的语音图谱代码如下
import numpy as np
import wave
# import matplotlib.pyplot as plt
import librosa
# -----先定义了两个公式,bark_change为线性频率坐标转换为Bark坐标,equal_loudness为等响度曲线--------
def bark_change(x):
return 6*np.log10(x/(1200*np.pi)+((x/(1200*np.pi))**2+1)**0.5)
def equal_loudness(x):
return ((x**2+56.8e6)*x**4)/((x**2+6.3e6)**2*(x**2+3.8e8))
def get_plp_lpc(filename):
f = wave.open(filename,"rb")#将.wav放在pycharm工程目录下,rb为只读
params = f.getparams()
nchannels, sampwidth, framerate, nframes =params[:4]
# print(params[:4])#返回声道数、量化位数、采样频率、采样点数
str_data = f.readframes(nframes)#readframes返回的是字符串类型的数据
signal =np.fromstring(str_data,dtype=np.int16)#将字符串转换为int类型的数组,我的sampwidth为2,所以转换为int16
signal_len=len(signal)
signal=signal/(max(abs(signal)))#归一化
# ------预加重并画时域波形----------
# signal_add=np.append(signal[0],signal[1:]-0.97*signal[:-1])
# time=np.arange(0,nframes)/1.0*framerate #画时间轴
# plt.figure(figsize=(20,10))
# plt.subplot(2,1,1)
# plt.plot(time[0:len(signal)],signal)#可能出现时间轴和信号长度不相等的情况,我这里选取了与singala长度相等的time
# plt.xlabel('time')
# plt.subplot(2,1,2)
# plt.plot(time[0:len(signal_add)],signal_add)
# plt.xlabel('time')
# plt.show()
#
# ----分帧------
wlen=1024#每帧信号的帧数
inc=256#帧移
# N=512#每帧信号帧数的一半
N=1024#每帧信号帧数的一半
nf = int(np.ceil((1.0 * signal_len - wlen +
inc) / inc))#计算帧数nf
pad_len=int((nf-1)*inc+wlen)
zeros=np.zeros(pad_len-signal_len)#不够的点用0填补
pad_signal=np.concatenate((signal,zeros))
indices=np.tile(np.arange(0,wlen),(nf,1))+np.tile(np.arange(0,nf*inc,inc),(wlen,1)).T
indices=np.array(indices,dtype=np.int32)
frames=pad_signal[indices]#最终得到帧数*帧长形式的信号数据
win=np.hamming(wlen)#先调用窗函数
# x=frames[10:] #我选取其中某一帧的数据提取PLP系数,即为frames的某一行
# y=win*x[0]#得到加窗后的信号
y=win*frames
# a=np.fft.fft(y)#做fft,默认为信号的长度即1024点
# b=np.square(abs(a[0:N])) #求fft变换结果的模的平方,只取a的一半
b=np.square(abs(np.fft.fft(y)))
df=framerate/N #频率分辨率
i=np.arange(N)#只取大于0部分的频率
freq_hz=i*df#得到实际频率坐标
# plt.plot(freq_hz,b)#得到该帧信号的功率谱
# plt.show()
freq_w=2*np.pi*np.array(freq_hz)#转换为角频率
freq_bark=bark_change(freq_w)#再转换为bark频率
point_hz= [250, 350, 450, 570, 700, 840,1000, 1170, 1370, 1600, 1850, 2150,2500,2900,3400]
#选取的临界频带数量一般要大于10,覆盖常用频率范围,这里我选取了15个中心频点
point_w=2*np.pi*np.array(point_hz)#转换为角频率
point_bark =bark_change(point_w)#转换为bark频率
bank=np.zeros((15,N))#构造15行512列的矩阵,每一行为一个滤波器向量
# filter_data=np.zeros(15)#构造15维频带能量向量
filter_data_list=[]
# -------构造滤波器组,这一段注意缩进---------
for j in range(15):
for k in range(N):
omg= freq_bark[k]- point_bark[j]
if -1.3<omg<-0.5:
bank[j,k]=10**(2.5*(omg+0.5))
elif -0.5<omg<0.5:
bank[j,k]=1
elif 0.5<omg<2.5:
bank[j,k] = 10**(-1.0*(omg-0.5))
else:
bank[j,k]=0
# filter_data[j] = np.sum(b * bank[j],-1) #滤波后将该段信号相加,最终得到15维的频带能量
filter_data_list.append( np.sum(b * bank[j],-1))
# plt.plot(freq_hz,bank[j])
# plt.xlim(0,20000)
# plt.xlabel('hz')
# plt.show()#画滤波器组
equal_data=np.stack(filter_data_list).T*equal_loudness(point_w)
cubic_data=equal_data**0.33
# ---做30点的ifft,得到30维PLP向量------------
plp_data=np.fft.ifft(cubic_data,30)
# print(plp_data)
# ---用librosa直接求传统LPC系数,需要0.7以上版本---
plp_list=[]
for plpone in plp_data:
plp=librosa.lpc(abs(plpone), 15)#要求输入参数为浮点型,经过ifft得到的plp_data有复数,因此要取abs
plp_list.append(plp)
h1=1.0/np.fft.fft(np.stack(plp_list),1024)
# spec_envelope_plp=10*np.log10(abs(h1[0:512]))
spec_envelope_plp=10*np.log10(abs(h1))
plt.figure()
plt.imshow(spec_envelope_plp)
plt.show()
# lpc=librosa.lpc(y,15)
# h2=1.0/np.fft.fft(lpc,1024)
# spec_envelope_lpc=10*np.log10(abs(h2[0:512]))
# spec_envelope_lpc=10*np.log10(abs(h2))
# plt.plot(spec_envelope_plp,'b',spec_envelope_lpc,'r')
# plt.show()
return spec_envelope_plp
python 实现PLP代码和语音分类项目
https://github.com/linhndt/spoken_language_classification/blob/master/feature_extractor/python_speech_features/rasta.py
import librosa
import librosa.filters
import scipy.fftpack as fft
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import spectrum
def rastaplp(x, fs=16000, win_time=0.040, hop_time=0.020, dorasta=True, modelorder=8):
# first compute power spectrum
p_spectrum, _ = powspec(x, fs, win_time, hop_time)
# next group to critical bands
aspectrum = audspec(p_spectrum, fs)
nbands = aspectrum.shape[0]
if dorasta:
# put in log domain
nl_aspectrum = np.log(aspectrum)
# next do rasta filtering
ras_nl_aspectrum = rastafilt(nl_aspectrum)
# do inverse log
aspectrum = np.exp(ras_nl_aspectrum)
postspectrum, _ = postaud(aspectrum, fs / 2)
lpcas = dolpc(postspectrum, modelorder)
cepstra = lpc2cep(lpcas, modelorder + 1)
if modelorder > 0:
lpcas = dolpc(postspectrum, modelorder)
cepstra = lpc2cep(lpcas, modelorder + 1)
spectra, F, pM = lpc2spec(lpcas, nbands)
else:
spectra = postspectrum
cepstra = spec2cep(spectra)
cepstra = lifter(cepstra, 0.6)
return cepstra
def powspec(x, fs=16000, window_time=0.040, hop_time=0.020, dither=1):
win_length = int(np.round(window_time * fs))
hop_length = int(np.round(hop_time * fs))
fft_length = int(np.power(2, np.ceil(np.log2(window_time * fs))))
X = librosa.stft(np.multiply(32768, x.astype(float)), n_fft=fft_length, hop_length=hop_length,
win_length=win_length, window='hann', center=False)
pow_X = np.power(np.abs(X), 2)
if dither:
pow_X = np.add(pow_X, win_length)
e = np.log(np.sum(pow_X, axis=0))
return pow_X, e
def hz2bark(f):
z = np.multiply(6, np.arcsinh(np.divide(f, 600)))
return z
def bark2hz(z):
hz = np.multiply(600, np.sinh(np.divide(z, 6)))
return hz
def fft2barkmx(fft_length, fs, nfilts=0, band_width=1, min_freq=0, max_freq=0):
if max_freq == 0:
max_freq = fs / 2
min_bark = hz2bark(min_freq)
nyqbark = hz2bark(max_freq) - min_bark
if nfilts == 0:
nfilts = np.add(np.ceil(nyqbark), 1)
wts = np.zeros((int(nfilts), int(fft_length)))
step_barks = np.divide(nyqbark, np.subtract(nfilts, 1))
binbarks = hz2bark(np.multiply(np.arange(0, np.add(np.divide(fft_length, 2), 1)), np.divide(fs, fft_length)))
for i in range(int(nfilts)):
f_bark_mid = min_bark + np.multiply(i, step_barks)
lof = np.subtract(np.subtract(binbarks, f_bark_mid), 0.5)
hif = np.add(np.subtract(binbarks, f_bark_mid), 0.5)
wts[i, 0: int(fft_length / 2) + 1] = np.power(10, np.minimum(0,
np.divide(np.minimum(hif, np.multiply(-2.5, lof)),
band_width)))
return wts
def rastafilt(x):
numer = np.arange(-2, 3)
numer = np.divide(-numer, np.sum(np.multiply(numer, numer)))
denom = np.array([1, -0.94])
zi = signal.lfilter_zi(numer, 1)
y = np.zeros((x.shape))
for i in range(x.shape[0]):
y1, zi = signal.lfilter(numer, 1, x[i, 0:4], axis=0, zi=zi * x[i, 0])
y1 = y1 * 0
y2, _ = signal.lfilter(numer, denom, x[i, 4:x.shape[1]], axis=0, zi=zi)
y[i, :] = np.append(y1, y2)
return y
def dolpc(x, modelorder=12):
nbands, nframes = x.shape
ncorr = 2 * (nbands - 1)
R = np.zeros((ncorr, nframes))
R[0:nbands, :] = x
for i in range(nbands - 1):
R[i + nbands - 1, :] = x[nbands - (i + 1), :]
r = fft.ifft(R.T).real.T
r = r[0:nbands, :]
y = np.ones((nframes, modelorder + 1))
e = np.zeros((nframes, 1))
if modelorder == 0:
for i in range(nframes):
_, e_tmp, _ = spectrum.LEVINSON(r[:, i], modelorder, allow_singularity=True)
e[i, 0] = e_tmp
else:
for i in range(nframes):
y_tmp, e_tmp, _ = spectrum.LEVINSON(r[:, i], modelorder, allow_singularity=True)
y[i, 1:modelorder + 1] = y_tmp
e[i, 0] = e_tmp
y = np.divide(y.T, np.add(np.tile(e.T, (modelorder + 1, 1)), 1e-8))
return y
def lpc2cep(a, nout=0):
nin, ncol = a.shape
order = nin - 1
if nout == 0:
nout = order + 1
cep = np.zeros((nout, ncol))
cep[0, :] = -np.log(a[0, :])
norm_a = np.divide(a, np.add(np.tile(a[0, :], (nin, 1)), 1e-8))
for n in range(1, nout):
sum = 0
for m in range(1, n):
sum = np.add(sum, np.multiply(np.multiply((n - m), norm_a[m, :]), cep[(n - m), :]))
cep[n, :] = -np.add(norm_a[n, :], np.divide(sum, n))
return cep
def lifter(x, lift=0.6, invs=False):
ncep = x.shape[0]
if lift == 0:
y = x
else:
if lift < 0:
warnings.warn('HTK liftering does not support yet; default liftering')
lift = 0.6
liftwts = np.power(np.arange(1, ncep), lift)
liftwts = np.append(1, liftwts)
if (invs):
liftwts = np.divide(1, liftwts)
y = np.matmul(np.diag(liftwts), x)
return y
def melfcc(x, fs=16000, min_freq=50, max_freq=6500, n_mfcc=13, n_bands=40, lifterexp=0.6,
fbtype='fcmel', dcttype=1, usecmp=True, window_time=0.040, hop_time=0.020,
preemph=0.97, dither=1, sumpower=1, band_width=1, modelorder=0,
broaden=0, useenergy=False):
if preemph != 0:
b = [1, -preemph]
a = 1
x = signal.lfilter(b, a, x)
pspectrum, logE = powspec(x, fs=fs, window_time=window_time, hop_time=hop_time, dither=dither)
aspectrum = audspec(pspectrum, fs=fs, nfilts=n_bands, fbtype=fbtype,
min_freq=min_freq, max_freq=max_freq)
if usecmp:
aspectrum, _ = postaud(aspectrum, fmax=max_freq, fbtype=fbtype)
if modelorder > 0:
lpcas = dolpc(aspectrum, modelorder)
cepstra = lpc2cep(lpcas, nout=n_mfcc)
else:
cepstra, _ = spec2cep(aspectrum, ncep=n_mfcc, dcttype=dcttype)
cepstra = lifter(cepstra, lift=lifterexp)
if useenergy == True:
cepstra[0, :] = logE
return cepstra
def hz2mel(f, htk=False):
if htk:
z = np.multiply(2595, np.log10(np.add(1, np.divide(f, 700))))
else:
f_0 = 0.0
f_sp = 200 / 3
brkfrq = 1000
brkpt = (brkfrq - f_0) / f_sp
logstep = np.exp(np.log(6.4) / 27.0)
f = np.array(f, ndmin=1)
z = np.zeros((f.shape[0],))
for i in range(f.shape[0]):
if f[i] < brkpt:
z[i] = (f[i] - f_0) / f_sp
else:
z[i] = brkpt + (np.log(f[i] / brkfrq) / np.log(logstep))
return z
def mel2hz(z, htk=False):
if htk:
f = np.multiply(700, np.subtract(np.power(10, np.divide(z, 2595)), 1))
else:
f_0 = 0
f_sp = 200 / 3
brkfrq = 1000
brkpt = (brkfrq - f_0) / f_sp
logstep = np.exp(np.log(6.4) / 27.0)
z = np.array(z, ndmin=1)
f = np.zeros((z.shape[0],))
for i in range(z.shape[0]):
if z[i] < brkpt:
f[i] = f_0 + f_sp * z[i]
else:
f[i] = brkfrq * np.exp(np.log(logstep) * (z[i] - brkpt))
return f
def fft2melmx(fft_length, fs, nfilts=0, band_width=1, min_freq=0, max_freq=0, htk=False, constamp=False):
if nfilts == 0:
nfilts = np.ceil(hz2mel(max_freq, htk) / 2)
if max_freq == 0:
max_freq = fs / 2
wts = np.zeros((int(nfilts), int(fft_length)))
fftfrqs = np.multiply(np.divide(np.arange(0, fft_length / 2 + 1), fft_length), fs)
min_mel = hz2mel(min_freq, htk)
max_mel = hz2mel(max_freq, htk)
binfrqs = mel2hz(np.add(min_mel, np.multiply(np.arange(0, nfilts + 2),
(max_mel - min_mel) / (nfilts + 1))), htk)
for i in range(int(nfilts)):
fs_tmp = binfrqs[np.add(np.arange(0, 3), i)]
fs_tmp = np.add(fs_tmp[1], np.multiply(band_width, np.subtract(fs_tmp, fs_tmp[1])))
loslope = np.divide(np.subtract(fftfrqs, fs_tmp[0]), np.subtract(fs_tmp[1], fs_tmp[0]))
hislope = np.divide(np.subtract(fs_tmp[2], fftfrqs), np.subtract(fs_tmp[2], fs_tmp[1]))
wts[i, 0: int(fft_length / 2) + 1] = np.maximum(0, np.minimum(loslope, hislope))
if constamp == False:
wts = np.matmul(np.diag(np.divide(2, np.subtract(binfrqs[2: int(nfilts) + 2],
binfrqs[0: int(nfilts)]))), wts)
return wts
def audspec(p_spectrum, fs=16000, nfilts=0, fbtype='bark', min_freq=0, max_freq=0, sumpower=1, band_width=1):
if nfilts == 0:
np.add(np.ceil(hz2bark(fs / 2)), 1)
if max_freq == 0:
max_freq = fs / 2
nfreqs = p_spectrum.shape[0]
nfft = (int(nfreqs) - 1) * 2
if fbtype == 'bark':
wts = fft2barkmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
elif fbtype == 'mel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
elif fbtype == 'htkmel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=True)
elif fbtype == 'fcmel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=False)
wts = wts[:, 0: nfreqs]
if sumpower:
aspectrum = np.matmul(wts, p_spectrum)
else:
aspectrum = np.power(np.matmul(wts, np.sqrt(p_spectrum)), 2)
return aspectrum
def postaud(x, fmax, fbtype='bark', broaden=0):
nbands, nframes = x.shape
nfpts = int(nbands + 2 * broaden)
if fbtype == 'bark':
bandcfhz = bark2hz(np.linspace(0, hz2bark(fmax), nfpts))
elif fbtype == 'mel':
bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax), nfpts))
elif fbtype == 'htkmel' or fbtype == 'fcmel':
bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax, htk=True), nfpts), htk=True)
bandcfhz = bandcfhz[broaden: (nfpts - broaden)]
fsq = np.power(bandcfhz, 2)
ftmp = np.add(fsq, 1.6e5)
eql = np.multiply(np.power(np.divide(fsq, ftmp), 2), np.divide(np.add(fsq, 1.44e6), np.add(fsq, 9.61e6)))
z = np.multiply(np.tile(eql, (nframes, 1)).T, x)
z = np.power(z, 0.33)
if broaden:
y = np.zeros((z.shape[0] + 2, z.shape[1]))
y[0, :] = z[0, :]
y[1:nbands + 1, :] = z
y[nbands + 1, :] = z[z.shape[0] - 1, :]
else:
y = np.zeros((z.shape[0], z.shape[1]))
y[0, :] = z[1, :]
y[1:nbands - 1, :] = z[1:z.shape[0] - 1, :]
y[nbands - 1, :] = z[z.shape[0] - 2, :]
return y, eql
def spec2cep(spec, ncep, dcttype):
nrow, ncol = spec.shape[0], spec.shape[1]
dctm = np.zeros((ncep, nrow))
if dcttype == 2 or dcttype == 3:
for i in range(ncep):
dctm[i, :] = np.multiply(
np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, 2 * nrow, 2)), (2 * nrow)), np.pi)),
np.sqrt(2 / nrow))
if dcttype == 2:
dctm[0, :] = np.divide(dctm[0, :], np.sqrt(2))
elif dcttype == 4:
for i in range(ncep):
dctm[i, :] = np.multiply(
np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, nrow + 1)), (nrow + 1)), np.pi)), 2)
dctm[i, 0] = np.add(dctm[i, 0], 1)
dctm[i, int(nrow - 1)] = np.multiply(dctm[i, int(nrow - 1)], np.power(-1, i))
dctm = np.divide(dctm, 2 * (nrow + 1))
else:
for i in range(ncep):
dctm[i, :] = np.divide(
np.multiply(np.cos(np.multiply(np.divide(np.multiply(i, np.arange(0, nrow)), (nrow - 1)), np.pi)), 2),
2 * (nrow - 1))
dctm[:, 0] = np.divide(dctm[:, 0], 2)
dctm[:, int(nrow - 1)] = np.divide(dctm[:, int(nrow - 1)], 2)
cep = np.matmul(dctm, np.log(np.add(spec, 1e-8)))
return cep, dctm
def lpc2spec(lpcas, nout=17, FMout=False):
rows, cols = lpcas.shape
order = rows - 1
gg = lpcas[0, :]
aa = np.divide(lpcas, np.tile(gg, (rows, 1)))
# Calculate the actual z-plane polyvals: nout points around unit circle
tmp_1 = np.array(np.arange(0, nout), ndmin=2).T
tmp_1 = np.divide(np.multiply(-1j, np.multiply(tmp_1, np.pi)), (nout - 1))
tmp_2 = np.array(np.arange(0, order + 1), ndmin=2)
zz = np.exp(np.matmul(tmp_1, tmp_2))
# Actual polyvals, in power (mag^2)
features = np.divide(np.power(np.divide(1, np.abs(np.matmul(zz, aa))), 2), np.tile(gg, (nout, 1)))
F = np.zeros((cols, int(np.ceil(rows / 2))))
M = F
if FMout == True:
for c in range(cols):
aaa = aa[:, c]
rr = np.roots(aaa)
ff_tmp = np.angle(rr)
ff = np.array(ff_tmp, ndmin=2).T
zz = np.exp(np.multiply(1j, np.matmul(ff, np.array(np.arange(0, aaa.shape[0]), ndmin=2))))
mags = np.sqrt(np.divide(np.power(np.divide(1, np.abs(np.matmul(zz, np.array(aaa, ndmin=2).T))), 2), gg[c]))
ix = np.argsort(ff_tmp)
dummy = np.sort(ff_tmp)
mp_F_list = []
tmp_M_list = []
for i in range(ff.shape[0]):
if dummy[i] > 0:
tmp_F_list = np.append(tmp_F_list, dummy[i])
tmp_M_list = np.append(tmp_M_list, mags[ix[i]])
M[c, 0: tmp_M_list.shape[0]] = tmp_M_list
F[c, 0: tmp_F_list.shape[0]] = tmp_F_list
return features, F, M
def deltas(x, w=9):
rows, cols = x.shape
hlen = np.floor(w / 2)
win = np.arange(hlen, -(hlen + 1), -1, dtype='float32')
xx = np.append(np.append(np.tile(x[:, 0], (int(hlen), 1)).T, x, axis=1),
np.tile(x[:, cols - 1], (int(hlen), 1)).T, axis=1)
d = signal.lfilter(win, 1, xx, axis=1)
d = d[:, int(2 * hlen): int(2 * hlen + cols)]
return d
def cep2spec(cep, nfreq, dcttype=2):
ncep, ncol = cep.shape
dctm = np.zeros((ncep, nfreq))
idctm = np.zeros((nfreq, ncep))
if dcttype == 2 or dcttype == 3:
for i in range(ncep):
dctm[i, :] = np.multiply(np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, 2 * nfreq, 2)),
(2 * nfreq)), np.pi)), np.sqrt(2 / nfreq))
if dcttype == 2:
dctm[0, :] = np.divide(dctm[0, :], np.sqrt(2))
else:
dctm[0, :] = np.divide(dctm[0, :], 2)
idctm = dctm.T
elif dcttype == 4:
for i in range(ncep):
idctm[:, i] = np.multiply(
np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, nfreq + 1).T), (nfreq + 1)), np.pi)), 2)
idctm[:, 0:ncep] = np.divide(idctm[:, 0:ncep], 2)
else:
for i in range(ncep):
idctm[:, i] = np.multiply(
np.cos(np.multiply(np.divide(np.multiply(i, np.arange(0, nfreq).T), (nfreq - 1)), np.pi)), 2)
idctm[:, [0, -1]] = np.divide(idctm[:, [0, -1]], 2)
spec = np.exp(np.matmul(idctm, cep))
return spec, idctm
def invpostaud(y, fmax, fbtype='bark', broaden=0):
nbands, nframes = y.shape
if fbtype == 'bark':
bandcfhz = bark2hz(np.linspace(0, hz2bark(fmax), nbands))
elif fbtype == 'mel':
bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax), nbands))
elif fbtype == 'htkmel' or fbtype == 'fcmel':
bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax, htk=True), nbands), htk=True)
bandcfhz = bandcfhz[broaden: (nbands - broaden)]
fsq = np.power(bandcfhz, 2)
ftmp = np.add(fsq, 1.6e5)
eql = np.multiply(np.power(np.divide(fsq, ftmp), 2),
np.divide(np.add(fsq, 1.44e6), np.add(fsq, 9.61e6)))
x = np.power(y, np.divide(1, 0.33))
if eql[0] == 0:
eql[0] = eql[1]
eql[-1] = eql[-2]
x = np.divide(x[broaden: (nbands - broaden + 1), :], np.add(np.tile(eql.T, (nframes, 1)).T, 1e-8))
return x, eql
def invpowspec(y, fs, win_time, hop_time, excit=[]):
nrow, ncol = y.shape
r = excit
winpts = int(np.round(np.multiply(win_time, fs)))
steppts = int(np.round(np.multiply(hop_time, fs)))
nfft = int(np.power(2, np.ceil(np.divide(np.log(winpts), np.log(2)))))
# Can't predict librosa stft length...
tmp = librosa.istft(y, hop_length=steppts, win_length=winpts,
window='hann', center=False)
xlen = len(tmp)
# xlen = int(np.add(winpts, np.multiply(steppts, np.subtract(ncol, 1))))
# xlen = int(np.multiply(steppts, np.subtract(ncol, 1)))
if len(r) == 0:
r = np.squeeze(np.random.randn(xlen, 1))
r = r[0:xlen]
R = librosa.stft(np.divide(r, 32768 * 12), n_fft=nfft, hop_length=steppts,
win_length=winpts, window='hann', center=False)
R = np.multiply(R, np.sqrt(y))
x = librosa.istft(R, hop_length=steppts, win_length=winpts,
window='hann', center=False)
return x
def invaudspec(aspectrum, fs=16000, nfft=512, fbtype='bark',
min_freq=0, max_freq=0, sumpower=True, band_width=1):
if max_freq == 0:
max_freq = fs / 2
nfilts, nframes = aspectrum.shape
if fbtype == 'bark':
wts = fft2barkmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
elif fbtype == 'mel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
elif fbtype == 'htkmel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=True)
elif fbtype == 'fcmel':
wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=False)
wts = wts[:, 0:int(nfft / 2 + 1)]
ww = np.matmul(wts.T, wts)
itws = np.divide(wts.T, np.tile(np.maximum(np.divide(np.mean(np.diag(ww)), 100),
np.sum(ww, axis=0)), (nfilts, 1)).T)
if sumpower == True:
spec = np.matmul(itws, aspectrum)
else:
spec = np.power(np.matmul(itws, np.sqrt(aspectrum)))
return spec, wts, itws
def invmelfcc(cep, fs, win_time=0.040, hop_time=0.020, lifterexp=0.6, sumpower=True,
preemph=0.97, max_freq=6500, min_freq=50, n_bands=40, band_width=1,
dcttype=2, fbtype='mel', usecmp=False, modelorder=0, broaden=0, excitation=[]):
winpts = int(np.round(np.multiply(win_time, fs)))
nfft = int(np.power(2, np.ceil(np.divide(np.log(winpts), np.log(2)))))
cep = lifter(cep, lift=lifterexp, invs=True)
pspc, _ = cep2spec(cep, nfreq=int(n_bands + 2 * broaden), dcttype=dcttype)
if usecmp == True:
aspc, _ = invpostaud(pspc, fmax=max_freq, fbtype=fbtype, broaden=broaden)
else:
aspc = pspc
spec, _, _ = invaudspec(aspc, fs=fs, nfft=nfft, fbtype=fbtype, min_freq=min_freq,
max_freq=max_freq, sumpower=sumpower, band_width=band_width)
x = invpowspec(spec, fs, win_time=win_time, hop_time=hop_time, excit=excitation)
if preemph != 0:
b = [1, -preemph]
a = 1
x = signal.lfilter(b, a, x)
return x, aspc, spec, pspc
# reference
# https://labrosa.ee.columbia.edu/matlab/rastamat/
if __name__ == '__main__':
d,fs=librosa.load("ATC00035.wav",16000)
plt.figure()
plt.imshow(rastaplp(d))
plt.show()
CesiumCesium 是一款面向三维地球和地图的,世界级的JavaScript开源产品。它提供了基于JavaScript语言的开发包,方便用户快速搭建一款零插件的虚拟地球Web应用,并在性能,精度,渲染质量以及多平台,易用性上都有高质量的保证。上一篇文章介绍了雷达产品的动图展示,这节我们用同样的原理实现卫星动图。效果图卫星云图也是一种重要的气象观测资料,与雷达产品不同的是,..._cesium云图效果
heartbeat单独提供高可用服务本文目录:1.简介2.安装heartbeat 2.1 编译安装Heartbeat3.heartbeat相关配置文件 3.1 配置文件ha.cf 3.2 配置文件authkeys 3.3 配置文件haresources4.示例:heartbeat为httpd提供高可用服务 1.简介heartbeat是人所众知高可用软件。但是在以前,heartbeat是Linux...
一、设计模式的目的:编写软件过程中,程序员面临来自耦合性,内聚性,可维护性,可扩展性,重用性,灵活性等多方面的挑战,设计模式是为了让程序(软件),具有更好的: 代码重用性 (即:相同功能的代码,不用多次编写) 可读性 (即:编程规范性, 便于其他程序员的阅读和理解) 可扩展性 (即:当需要增加新的功能时,非常的方便,称为可维护) 可靠性 (即:当我们增加..._违反单一职责原则
WampServer是什么WampServer是一款功能强大的PHP集成安装环境。有了这款wampserver,省去了你手动配置apache、php、mysql,只需要一键便可以完成php环境的安装。而且还为您节省 了时间和配置中出现的错误。Apache1.同个局域网内的人(通过IP地址访问)访问自己本机上的页面第一步:找到"httpd.conf"文件;点击WampServer,找到Ap..._wampserver3.3环境要求
来源 今天启动Android studio的时候出现“adb not responding. you can wait more, or kill "adb.exe" process manually and click 'Restart' ”这个错误: 尝试了点Wait more,Restart和Cancel按钮,都无法解决问题,重启也不行,后来在网上查了下解决的方法,说是adb
论文链接:https://web.eecs.umich.edu/~justincj/papers/eccv16/JohnsonECCV16.pdfPerceptual Losses for Real-Time Style Transfer and Super-Resolution摘要1 引言2 相关工作前馈式图像变换感知优化风格转移图像超分辨率3 方法3.1 图像变换网络输入和输出下采样和上采样残差连接3.2 感知损失函数特征重建损失风格重构损失3.3 简单损失函数像素损失总变化正则化4 实验4.1 风格_才能在大小为c×h×w的输入上使用c个3×3滤波器进行卷积。而这与在形状为dc×h/d
使用JQuery表单选择器,实现全选/全不选/和反选的效果<!DOCTYPE html><html> <head> <meta charset="UTF-8"> <script type="text/javascript" src="js/jquery-1.8.3.js"&
expire_logs_day 设置binlog老化日期触发时机是binlog发生切换:binlog大小超过max_binlog_size手动执行flush logs重新启动时(mysql将会new一个新文件用于记录binlog)该参数可以在线修改mysql> show variables like '%expire%';+------------------+-------+| Variable_name | Value |+-----------------_expire_logs_days
前提是navicat连接到了mysql第一步左边区域你想导入的表中点击鼠标右键,出现“运行SQL文件”,点击进去,然后找到路径,导入。第二步完成_mysql怎么在navicat里导入文件
新版iTools AirPlayer应广大兔粉们的要求,终于实现了历史性的突破,实现通过电脑上的镜像逆向控制苹果设备,植物大战僵尸2一秒变pc版,天天爱消除鼠标操作更快更爽!下面给出反向控制教程,需要下面几个条件1. 您的设备为A5以上芯片,支持AirPlay镜像,并使用镜像的方式投影到电脑显示器(不能是投射照片或者AirPlay视频投射)。2. 您的设备已经越狱3. 安装越狱插件ve
C语言基本并发操作简介引言实验环境利用fork函数创建进程创建实例进程之间的竞争竞争实例创建线程等待线程注意事项线程实例线程共享数据共享数据实例总结引言在处理一些数据集比较繁杂且解题思路比较单一的问题时,可以通过同一个算法计算多次循环操作解决,但当对效率要求比较高的时候,如果电脑的配置比较好,通常倾向于采用并发编程,其中,线程之间由于切换比较复杂,上下文切换时间较长,并且进程间数据共享比较麻烦,这里我们通常倾向于利用多线程的方法来解决问题。这里我们简要介绍一下多进程、线程思想的一些基本操作。实验环境_c语言并发
相较于其他领域,图像领域的对抗样本生成有以下优势:1)真实图像与虚假图像于观察者是直观的;2)图像数据与图像分类器的结构相对简单。本文以全连接网络和卷积神经网络为例,以MNIST、CIFAR10,以及ImageNet为基础样本,研究基于逃避对抗,包括白盒、黑盒、灰盒,以及物理攻击的图像对抗样本生成。_图像特征提取的对抗攻击