RASTA-PLP 听觉谱 应用到 语音分类_东方佑的博客-程序员宅基地

技术标签: 语音识别  日常  

https://blog.csdn.net/weixin_42485817/article/details/107590846
在这里插入图片描述
如图所示,产出如此优美的语音图谱代码如下

import numpy as np
import wave
# import matplotlib.pyplot as plt
import librosa
# -----先定义了两个公式,bark_change为线性频率坐标转换为Bark坐标,equal_loudness为等响度曲线--------
def bark_change(x):
    return 6*np.log10(x/(1200*np.pi)+((x/(1200*np.pi))**2+1)**0.5)
def equal_loudness(x):
    return ((x**2+56.8e6)*x**4)/((x**2+6.3e6)**2*(x**2+3.8e8))

def get_plp_lpc(filename):
    f = wave.open(filename,"rb")#将.wav放在pycharm工程目录下,rb为只读
    params = f.getparams()
    nchannels, sampwidth, framerate, nframes =params[:4]
    # print(params[:4])#返回声道数、量化位数、采样频率、采样点数
    str_data = f.readframes(nframes)#readframes返回的是字符串类型的数据
    signal =np.fromstring(str_data,dtype=np.int16)#将字符串转换为int类型的数组,我的sampwidth为2,所以转换为int16
    signal_len=len(signal)
    signal=signal/(max(abs(signal)))#归一化
    # ------预加重并画时域波形----------
    # signal_add=np.append(signal[0],signal[1:]-0.97*signal[:-1])
    # time=np.arange(0,nframes)/1.0*framerate #画时间轴
    # plt.figure(figsize=(20,10))
    # plt.subplot(2,1,1)
    # plt.plot(time[0:len(signal)],signal)#可能出现时间轴和信号长度不相等的情况,我这里选取了与singala长度相等的time
    # plt.xlabel('time')
    # plt.subplot(2,1,2)
    # plt.plot(time[0:len(signal_add)],signal_add)
    # plt.xlabel('time')
    # plt.show()
    #


    # ----分帧------
    wlen=1024#每帧信号的帧数
    inc=256#帧移
    # N=512#每帧信号帧数的一半
    N=1024#每帧信号帧数的一半
    nf = int(np.ceil((1.0 * signal_len - wlen +
    inc) / inc))#计算帧数nf
    pad_len=int((nf-1)*inc+wlen)
    zeros=np.zeros(pad_len-signal_len)#不够的点用0填补
    pad_signal=np.concatenate((signal,zeros))
    indices=np.tile(np.arange(0,wlen),(nf,1))+np.tile(np.arange(0,nf*inc,inc),(wlen,1)).T
    indices=np.array(indices,dtype=np.int32)
    frames=pad_signal[indices]#最终得到帧数*帧长形式的信号数据
    win=np.hamming(wlen)#先调用窗函数
    # x=frames[10:] #我选取其中某一帧的数据提取PLP系数,即为frames的某一行
    # y=win*x[0]#得到加窗后的信号
    y=win*frames




    # a=np.fft.fft(y)#做fft,默认为信号的长度即1024点
    # b=np.square(abs(a[0:N])) #求fft变换结果的模的平方,只取a的一半
    b=np.square(abs(np.fft.fft(y)))
    df=framerate/N  #频率分辨率
    i=np.arange(N)#只取大于0部分的频率
    freq_hz=i*df#得到实际频率坐标
    # plt.plot(freq_hz,b)#得到该帧信号的功率谱
    # plt.show()
    freq_w=2*np.pi*np.array(freq_hz)#转换为角频率
    freq_bark=bark_change(freq_w)#再转换为bark频率
    point_hz= [250, 350, 450, 570, 700, 840,1000, 1170, 1370, 1600, 1850, 2150,2500,2900,3400]
    #选取的临界频带数量一般要大于10,覆盖常用频率范围,这里我选取了15个中心频点
    point_w=2*np.pi*np.array(point_hz)#转换为角频率
    point_bark =bark_change(point_w)#转换为bark频率
    bank=np.zeros((15,N))#构造15行512列的矩阵,每一行为一个滤波器向量
    # filter_data=np.zeros(15)#构造15维频带能量向量
    filter_data_list=[]
    # -------构造滤波器组,这一段注意缩进---------
    for j in range(15):
       for k in range(N):
           omg= freq_bark[k]- point_bark[j]
           if -1.3<omg<-0.5:
              bank[j,k]=10**(2.5*(omg+0.5))
           elif -0.5<omg<0.5:
              bank[j,k]=1
           elif 0.5<omg<2.5:
              bank[j,k] = 10**(-1.0*(omg-0.5))
           else:
              bank[j,k]=0
       # filter_data[j] = np.sum(b * bank[j],-1) #滤波后将该段信号相加,最终得到15维的频带能量
       filter_data_list.append( np.sum(b * bank[j],-1))
    #    plt.plot(freq_hz,bank[j])
    #    plt.xlim(0,20000)
    #    plt.xlabel('hz')
    # plt.show()#画滤波器组


    equal_data=np.stack(filter_data_list).T*equal_loudness(point_w)
    cubic_data=equal_data**0.33
    # ---做30点的ifft,得到30维PLP向量------------
    plp_data=np.fft.ifft(cubic_data,30)
    # print(plp_data)


    # ---用librosa直接求传统LPC系数,需要0.7以上版本---
    plp_list=[]
    for plpone in plp_data:
        plp=librosa.lpc(abs(plpone), 15)#要求输入参数为浮点型,经过ifft得到的plp_data有复数,因此要取abs
        plp_list.append(plp)
    h1=1.0/np.fft.fft(np.stack(plp_list),1024)
    # spec_envelope_plp=10*np.log10(abs(h1[0:512]))
    spec_envelope_plp=10*np.log10(abs(h1))
    plt.figure()
    plt.imshow(spec_envelope_plp)
    plt.show()
    # lpc=librosa.lpc(y,15)
    # h2=1.0/np.fft.fft(lpc,1024)
    # spec_envelope_lpc=10*np.log10(abs(h2[0:512]))
    # spec_envelope_lpc=10*np.log10(abs(h2))
    # plt.plot(spec_envelope_plp,'b',spec_envelope_lpc,'r')
    # plt.show()
    return spec_envelope_plp

python 实现PLP代码和语音分类项目
https://github.com/linhndt/spoken_language_classification/blob/master/feature_extractor/python_speech_features/rasta.py

在这里插入图片描述

import librosa
import librosa.filters
import scipy.fftpack as fft
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import spectrum


def rastaplp(x, fs=16000, win_time=0.040, hop_time=0.020, dorasta=True, modelorder=8):
    # first compute power spectrum
    p_spectrum, _ = powspec(x, fs, win_time, hop_time)
    # next group to critical bands
    aspectrum = audspec(p_spectrum, fs)
    nbands = aspectrum.shape[0]

    if dorasta:
        # put in log domain
        nl_aspectrum = np.log(aspectrum)
        # next do rasta filtering
        ras_nl_aspectrum = rastafilt(nl_aspectrum)
        # do inverse log
        aspectrum = np.exp(ras_nl_aspectrum)

    postspectrum, _ = postaud(aspectrum, fs / 2)

    lpcas = dolpc(postspectrum, modelorder)
    cepstra = lpc2cep(lpcas, modelorder + 1)

    if modelorder > 0:
        lpcas = dolpc(postspectrum, modelorder)
        cepstra = lpc2cep(lpcas, modelorder + 1)
        spectra, F, pM = lpc2spec(lpcas, nbands)
    else:
        spectra = postspectrum
        cepstra = spec2cep(spectra)

    cepstra = lifter(cepstra, 0.6)

    return cepstra


def powspec(x, fs=16000, window_time=0.040, hop_time=0.020, dither=1):
    win_length = int(np.round(window_time * fs))
    hop_length = int(np.round(hop_time * fs))
    fft_length = int(np.power(2, np.ceil(np.log2(window_time * fs))))

    X = librosa.stft(np.multiply(32768, x.astype(float)), n_fft=fft_length, hop_length=hop_length,
                     win_length=win_length, window='hann', center=False)
    pow_X = np.power(np.abs(X), 2)
    if dither:
        pow_X = np.add(pow_X, win_length)
    e = np.log(np.sum(pow_X, axis=0))
    return pow_X, e


def hz2bark(f):
    z = np.multiply(6, np.arcsinh(np.divide(f, 600)))
    return z


def bark2hz(z):
    hz = np.multiply(600, np.sinh(np.divide(z, 6)))
    return hz


def fft2barkmx(fft_length, fs, nfilts=0, band_width=1, min_freq=0, max_freq=0):
    if max_freq == 0:
        max_freq = fs / 2

    min_bark = hz2bark(min_freq)
    nyqbark = hz2bark(max_freq) - min_bark

    if nfilts == 0:
        nfilts = np.add(np.ceil(nyqbark), 1)

    wts = np.zeros((int(nfilts), int(fft_length)))
    step_barks = np.divide(nyqbark, np.subtract(nfilts, 1))
    binbarks = hz2bark(np.multiply(np.arange(0, np.add(np.divide(fft_length, 2), 1)), np.divide(fs, fft_length)))

    for i in range(int(nfilts)):
        f_bark_mid = min_bark + np.multiply(i, step_barks)
        lof = np.subtract(np.subtract(binbarks, f_bark_mid), 0.5)
        hif = np.add(np.subtract(binbarks, f_bark_mid), 0.5)
        wts[i, 0: int(fft_length / 2) + 1] = np.power(10, np.minimum(0,
                                                                     np.divide(np.minimum(hif, np.multiply(-2.5, lof)),
                                                                               band_width)))
    return wts


def rastafilt(x):
    numer = np.arange(-2, 3)
    numer = np.divide(-numer, np.sum(np.multiply(numer, numer)))
    denom = np.array([1, -0.94])

    zi = signal.lfilter_zi(numer, 1)
    y = np.zeros((x.shape))
    for i in range(x.shape[0]):
        y1, zi = signal.lfilter(numer, 1, x[i, 0:4], axis=0, zi=zi * x[i, 0])
        y1 = y1 * 0
        y2, _ = signal.lfilter(numer, denom, x[i, 4:x.shape[1]], axis=0, zi=zi)
        y[i, :] = np.append(y1, y2)
    return y


def dolpc(x, modelorder=12):
    nbands, nframes = x.shape
    ncorr = 2 * (nbands - 1)
    R = np.zeros((ncorr, nframes))

    R[0:nbands, :] = x
    for i in range(nbands - 1):
        R[i + nbands - 1, :] = x[nbands - (i + 1), :]

    r = fft.ifft(R.T).real.T
    r = r[0:nbands, :]

    y = np.ones((nframes, modelorder + 1))
    e = np.zeros((nframes, 1))

    if modelorder == 0:
        for i in range(nframes):
            _, e_tmp, _ = spectrum.LEVINSON(r[:, i], modelorder, allow_singularity=True)
            e[i, 0] = e_tmp
    else:
        for i in range(nframes):
            y_tmp, e_tmp, _ = spectrum.LEVINSON(r[:, i], modelorder, allow_singularity=True)
            y[i, 1:modelorder + 1] = y_tmp
            e[i, 0] = e_tmp

    y = np.divide(y.T, np.add(np.tile(e.T, (modelorder + 1, 1)), 1e-8))

    return y


def lpc2cep(a, nout=0):
    nin, ncol = a.shape

    order = nin - 1

    if nout == 0:
        nout = order + 1

    cep = np.zeros((nout, ncol))
    cep[0, :] = -np.log(a[0, :])

    norm_a = np.divide(a, np.add(np.tile(a[0, :], (nin, 1)), 1e-8))

    for n in range(1, nout):
        sum = 0
        for m in range(1, n):
            sum = np.add(sum, np.multiply(np.multiply((n - m), norm_a[m, :]), cep[(n - m), :]))

        cep[n, :] = -np.add(norm_a[n, :], np.divide(sum, n))

    return cep


def lifter(x, lift=0.6, invs=False):
    ncep = x.shape[0]

    if lift == 0:
        y = x
    else:
        if lift < 0:
            warnings.warn('HTK liftering does not support yet; default liftering')
            lift = 0.6
        liftwts = np.power(np.arange(1, ncep), lift)
        liftwts = np.append(1, liftwts)

        if (invs):
            liftwts = np.divide(1, liftwts)

        y = np.matmul(np.diag(liftwts), x)

    return y


def melfcc(x, fs=16000, min_freq=50, max_freq=6500, n_mfcc=13, n_bands=40, lifterexp=0.6,
           fbtype='fcmel', dcttype=1, usecmp=True, window_time=0.040, hop_time=0.020,
           preemph=0.97, dither=1, sumpower=1, band_width=1, modelorder=0,
           broaden=0, useenergy=False):
    if preemph != 0:
        b = [1, -preemph]
        a = 1
        x = signal.lfilter(b, a, x)

    pspectrum, logE = powspec(x, fs=fs, window_time=window_time, hop_time=hop_time, dither=dither)
    aspectrum = audspec(pspectrum, fs=fs, nfilts=n_bands, fbtype=fbtype,
                        min_freq=min_freq, max_freq=max_freq)

    if usecmp:
        aspectrum, _ = postaud(aspectrum, fmax=max_freq, fbtype=fbtype)

    if modelorder > 0:
        lpcas = dolpc(aspectrum, modelorder)
        cepstra = lpc2cep(lpcas, nout=n_mfcc)

    else:
        cepstra, _ = spec2cep(aspectrum, ncep=n_mfcc, dcttype=dcttype)

    cepstra = lifter(cepstra, lift=lifterexp)

    if useenergy == True:
        cepstra[0, :] = logE

    return cepstra


def hz2mel(f, htk=False):
    if htk:
        z = np.multiply(2595, np.log10(np.add(1, np.divide(f, 700))))
    else:
        f_0 = 0.0
        f_sp = 200 / 3
        brkfrq = 1000
        brkpt = (brkfrq - f_0) / f_sp
        logstep = np.exp(np.log(6.4) / 27.0)

        f = np.array(f, ndmin=1)
        z = np.zeros((f.shape[0],))

        for i in range(f.shape[0]):
            if f[i] < brkpt:
                z[i] = (f[i] - f_0) / f_sp
            else:
                z[i] = brkpt + (np.log(f[i] / brkfrq) / np.log(logstep))
    return z


def mel2hz(z, htk=False):
    if htk:
        f = np.multiply(700, np.subtract(np.power(10, np.divide(z, 2595)), 1))
    else:
        f_0 = 0
        f_sp = 200 / 3
        brkfrq = 1000
        brkpt = (brkfrq - f_0) / f_sp
        logstep = np.exp(np.log(6.4) / 27.0)

        z = np.array(z, ndmin=1)
        f = np.zeros((z.shape[0],))

        for i in range(z.shape[0]):
            if z[i] < brkpt:
                f[i] = f_0 + f_sp * z[i]
            else:
                f[i] = brkfrq * np.exp(np.log(logstep) * (z[i] - brkpt))
    return f


def fft2melmx(fft_length, fs, nfilts=0, band_width=1, min_freq=0, max_freq=0, htk=False, constamp=False):
    if nfilts == 0:
        nfilts = np.ceil(hz2mel(max_freq, htk) / 2)
    if max_freq == 0:
        max_freq = fs / 2

    wts = np.zeros((int(nfilts), int(fft_length)))
    fftfrqs = np.multiply(np.divide(np.arange(0, fft_length / 2 + 1), fft_length), fs)

    min_mel = hz2mel(min_freq, htk)
    max_mel = hz2mel(max_freq, htk)
    binfrqs = mel2hz(np.add(min_mel, np.multiply(np.arange(0, nfilts + 2),
                                                 (max_mel - min_mel) / (nfilts + 1))), htk)

    for i in range(int(nfilts)):
        fs_tmp = binfrqs[np.add(np.arange(0, 3), i)]
        fs_tmp = np.add(fs_tmp[1], np.multiply(band_width, np.subtract(fs_tmp, fs_tmp[1])))
        loslope = np.divide(np.subtract(fftfrqs, fs_tmp[0]), np.subtract(fs_tmp[1], fs_tmp[0]))
        hislope = np.divide(np.subtract(fs_tmp[2], fftfrqs), np.subtract(fs_tmp[2], fs_tmp[1]))
        wts[i, 0: int(fft_length / 2) + 1] = np.maximum(0, np.minimum(loslope, hislope))

    if constamp == False:
        wts = np.matmul(np.diag(np.divide(2, np.subtract(binfrqs[2: int(nfilts) + 2],
                                                         binfrqs[0: int(nfilts)]))), wts)

    return wts


def audspec(p_spectrum, fs=16000, nfilts=0, fbtype='bark', min_freq=0, max_freq=0, sumpower=1, band_width=1):
    if nfilts == 0:
        np.add(np.ceil(hz2bark(fs / 2)), 1)
    if max_freq == 0:
        max_freq = fs / 2
    nfreqs = p_spectrum.shape[0]
    nfft = (int(nfreqs) - 1) * 2

    if fbtype == 'bark':
        wts = fft2barkmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
    elif fbtype == 'mel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
    elif fbtype == 'htkmel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=True)
    elif fbtype == 'fcmel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=False)

    wts = wts[:, 0: nfreqs]

    if sumpower:
        aspectrum = np.matmul(wts, p_spectrum)
    else:
        aspectrum = np.power(np.matmul(wts, np.sqrt(p_spectrum)), 2)
    return aspectrum


def postaud(x, fmax, fbtype='bark', broaden=0):
    nbands, nframes = x.shape
    nfpts = int(nbands + 2 * broaden)

    if fbtype == 'bark':
        bandcfhz = bark2hz(np.linspace(0, hz2bark(fmax), nfpts))
    elif fbtype == 'mel':
        bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax), nfpts))
    elif fbtype == 'htkmel' or fbtype == 'fcmel':
        bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax, htk=True), nfpts), htk=True)

    bandcfhz = bandcfhz[broaden: (nfpts - broaden)]

    fsq = np.power(bandcfhz, 2)
    ftmp = np.add(fsq, 1.6e5)
    eql = np.multiply(np.power(np.divide(fsq, ftmp), 2), np.divide(np.add(fsq, 1.44e6), np.add(fsq, 9.61e6)))

    z = np.multiply(np.tile(eql, (nframes, 1)).T, x)
    z = np.power(z, 0.33)

    if broaden:
        y = np.zeros((z.shape[0] + 2, z.shape[1]))
        y[0, :] = z[0, :]
        y[1:nbands + 1, :] = z
        y[nbands + 1, :] = z[z.shape[0] - 1, :]
    else:
        y = np.zeros((z.shape[0], z.shape[1]))
        y[0, :] = z[1, :]
        y[1:nbands - 1, :] = z[1:z.shape[0] - 1, :]
        y[nbands - 1, :] = z[z.shape[0] - 2, :]

    return y, eql


def spec2cep(spec, ncep, dcttype):
    nrow, ncol = spec.shape[0], spec.shape[1]
    dctm = np.zeros((ncep, nrow))

    if dcttype == 2 or dcttype == 3:
        for i in range(ncep):
            dctm[i, :] = np.multiply(
                np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, 2 * nrow, 2)), (2 * nrow)), np.pi)),
                np.sqrt(2 / nrow))

        if dcttype == 2:
            dctm[0, :] = np.divide(dctm[0, :], np.sqrt(2))

    elif dcttype == 4:
        for i in range(ncep):
            dctm[i, :] = np.multiply(
                np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, nrow + 1)), (nrow + 1)), np.pi)), 2)
            dctm[i, 0] = np.add(dctm[i, 0], 1)
            dctm[i, int(nrow - 1)] = np.multiply(dctm[i, int(nrow - 1)], np.power(-1, i))
        dctm = np.divide(dctm, 2 * (nrow + 1))

    else:
        for i in range(ncep):
            dctm[i, :] = np.divide(
                np.multiply(np.cos(np.multiply(np.divide(np.multiply(i, np.arange(0, nrow)), (nrow - 1)), np.pi)), 2),
                2 * (nrow - 1))
        dctm[:, 0] = np.divide(dctm[:, 0], 2)
        dctm[:, int(nrow - 1)] = np.divide(dctm[:, int(nrow - 1)], 2)

    cep = np.matmul(dctm, np.log(np.add(spec, 1e-8)))

    return cep, dctm


def lpc2spec(lpcas, nout=17, FMout=False):
    rows, cols = lpcas.shape
    order = rows - 1

    gg = lpcas[0, :]
    aa = np.divide(lpcas, np.tile(gg, (rows, 1)))

    #    Calculate the actual z-plane polyvals: nout points around unit circle
    tmp_1 = np.array(np.arange(0, nout), ndmin=2).T
    tmp_1 = np.divide(np.multiply(-1j, np.multiply(tmp_1, np.pi)), (nout - 1))
    tmp_2 = np.array(np.arange(0, order + 1), ndmin=2)
    zz = np.exp(np.matmul(tmp_1, tmp_2))
    #    Actual polyvals, in power (mag^2)
    features = np.divide(np.power(np.divide(1, np.abs(np.matmul(zz, aa))), 2), np.tile(gg, (nout, 1)))
    F = np.zeros((cols, int(np.ceil(rows / 2))))
    M = F

    if FMout == True:
        for c in range(cols):
            aaa = aa[:, c]
            rr = np.roots(aaa)
            ff_tmp = np.angle(rr)
            ff = np.array(ff_tmp, ndmin=2).T
            zz = np.exp(np.multiply(1j, np.matmul(ff, np.array(np.arange(0, aaa.shape[0]), ndmin=2))))
            mags = np.sqrt(np.divide(np.power(np.divide(1, np.abs(np.matmul(zz, np.array(aaa, ndmin=2).T))), 2), gg[c]))

            ix = np.argsort(ff_tmp)
            dummy = np.sort(ff_tmp)
            mp_F_list = []
            tmp_M_list = []

            for i in range(ff.shape[0]):
                if dummy[i] > 0:
                    tmp_F_list = np.append(tmp_F_list, dummy[i])
                    tmp_M_list = np.append(tmp_M_list, mags[ix[i]])

            M[c, 0: tmp_M_list.shape[0]] = tmp_M_list
            F[c, 0: tmp_F_list.shape[0]] = tmp_F_list

    return features, F, M


def deltas(x, w=9):
    rows, cols = x.shape
    hlen = np.floor(w / 2)
    win = np.arange(hlen, -(hlen + 1), -1, dtype='float32')

    xx = np.append(np.append(np.tile(x[:, 0], (int(hlen), 1)).T, x, axis=1),
                   np.tile(x[:, cols - 1], (int(hlen), 1)).T, axis=1)

    d = signal.lfilter(win, 1, xx, axis=1)
    d = d[:, int(2 * hlen): int(2 * hlen + cols)]
    return d


def cep2spec(cep, nfreq, dcttype=2):
    ncep, ncol = cep.shape

    dctm = np.zeros((ncep, nfreq))
    idctm = np.zeros((nfreq, ncep))

    if dcttype == 2 or dcttype == 3:
        for i in range(ncep):
            dctm[i, :] = np.multiply(np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, 2 * nfreq, 2)),
                                                                  (2 * nfreq)), np.pi)), np.sqrt(2 / nfreq))

        if dcttype == 2:
            dctm[0, :] = np.divide(dctm[0, :], np.sqrt(2))
        else:
            dctm[0, :] = np.divide(dctm[0, :], 2)

        idctm = dctm.T

    elif dcttype == 4:
        for i in range(ncep):
            idctm[:, i] = np.multiply(
                np.cos(np.multiply(np.divide(np.multiply(i, np.arange(1, nfreq + 1).T), (nfreq + 1)), np.pi)), 2)

        idctm[:, 0:ncep] = np.divide(idctm[:, 0:ncep], 2)

    else:
        for i in range(ncep):
            idctm[:, i] = np.multiply(
                np.cos(np.multiply(np.divide(np.multiply(i, np.arange(0, nfreq).T), (nfreq - 1)), np.pi)), 2)

        idctm[:, [0, -1]] = np.divide(idctm[:, [0, -1]], 2)

    spec = np.exp(np.matmul(idctm, cep))

    return spec, idctm


def invpostaud(y, fmax, fbtype='bark', broaden=0):
    nbands, nframes = y.shape

    if fbtype == 'bark':
        bandcfhz = bark2hz(np.linspace(0, hz2bark(fmax), nbands))
    elif fbtype == 'mel':
        bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax), nbands))
    elif fbtype == 'htkmel' or fbtype == 'fcmel':
        bandcfhz = mel2hz(np.linspace(0, hz2mel(fmax, htk=True), nbands), htk=True)

    bandcfhz = bandcfhz[broaden: (nbands - broaden)]

    fsq = np.power(bandcfhz, 2)
    ftmp = np.add(fsq, 1.6e5)
    eql = np.multiply(np.power(np.divide(fsq, ftmp), 2),
                      np.divide(np.add(fsq, 1.44e6), np.add(fsq, 9.61e6)))

    x = np.power(y, np.divide(1, 0.33))

    if eql[0] == 0:
        eql[0] = eql[1]
        eql[-1] = eql[-2]

    x = np.divide(x[broaden: (nbands - broaden + 1), :], np.add(np.tile(eql.T, (nframes, 1)).T, 1e-8))

    return x, eql


def invpowspec(y, fs, win_time, hop_time, excit=[]):
    nrow, ncol = y.shape
    r = excit

    winpts = int(np.round(np.multiply(win_time, fs)))
    steppts = int(np.round(np.multiply(hop_time, fs)))
    nfft = int(np.power(2, np.ceil(np.divide(np.log(winpts), np.log(2)))))

    # Can't predict librosa stft length...
    tmp = librosa.istft(y, hop_length=steppts, win_length=winpts,
                        window='hann', center=False)
    xlen = len(tmp)
    # xlen = int(np.add(winpts, np.multiply(steppts, np.subtract(ncol, 1))))
    # xlen = int(np.multiply(steppts, np.subtract(ncol, 1)))

    if len(r) == 0:
        r = np.squeeze(np.random.randn(xlen, 1))
    r = r[0:xlen]

    R = librosa.stft(np.divide(r, 32768 * 12), n_fft=nfft, hop_length=steppts,
                     win_length=winpts, window='hann', center=False)

    R = np.multiply(R, np.sqrt(y))
    x = librosa.istft(R, hop_length=steppts, win_length=winpts,
                      window='hann', center=False)

    return x


def invaudspec(aspectrum, fs=16000, nfft=512, fbtype='bark',
               min_freq=0, max_freq=0, sumpower=True, band_width=1):
    if max_freq == 0:
        max_freq = fs / 2
    nfilts, nframes = aspectrum.shape

    if fbtype == 'bark':
        wts = fft2barkmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
    elif fbtype == 'mel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq)
    elif fbtype == 'htkmel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=True)
    elif fbtype == 'fcmel':
        wts = fft2melmx(nfft, fs, nfilts, band_width, min_freq, max_freq, htk=True, constamp=False)

    wts = wts[:, 0:int(nfft / 2 + 1)]

    ww = np.matmul(wts.T, wts)
    itws = np.divide(wts.T, np.tile(np.maximum(np.divide(np.mean(np.diag(ww)), 100),
                                               np.sum(ww, axis=0)), (nfilts, 1)).T)
    if sumpower == True:
        spec = np.matmul(itws, aspectrum)
    else:
        spec = np.power(np.matmul(itws, np.sqrt(aspectrum)))

    return spec, wts, itws


def invmelfcc(cep, fs, win_time=0.040, hop_time=0.020, lifterexp=0.6, sumpower=True,
              preemph=0.97, max_freq=6500, min_freq=50, n_bands=40, band_width=1,
              dcttype=2, fbtype='mel', usecmp=False, modelorder=0, broaden=0, excitation=[]):
    winpts = int(np.round(np.multiply(win_time, fs)))
    nfft = int(np.power(2, np.ceil(np.divide(np.log(winpts), np.log(2)))))

    cep = lifter(cep, lift=lifterexp, invs=True)

    pspc, _ = cep2spec(cep, nfreq=int(n_bands + 2 * broaden), dcttype=dcttype)

    if usecmp == True:
        aspc, _ = invpostaud(pspc, fmax=max_freq, fbtype=fbtype, broaden=broaden)
    else:
        aspc = pspc

    spec, _, _ = invaudspec(aspc, fs=fs, nfft=nfft, fbtype=fbtype, min_freq=min_freq,
                            max_freq=max_freq, sumpower=sumpower, band_width=band_width)

    x = invpowspec(spec, fs, win_time=win_time, hop_time=hop_time, excit=excitation)

    if preemph != 0:
        b = [1, -preemph]
        a = 1
        x = signal.lfilter(b, a, x)

    return x, aspc, spec, pspc
# reference
# https://labrosa.ee.columbia.edu/matlab/rastamat/
if __name__ == '__main__':
    d,fs=librosa.load("ATC00035.wav",16000)
    plt.figure()
    plt.imshow(rastaplp(d))
    plt.show()
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/weixin_32759777/article/details/117999297

智能推荐

Cesium专栏-气象卫星云图动图(附源码下载)-程序员宅基地

CesiumCesium 是一款面向三维地球和地图的,世界级的JavaScript开源产品。它提供了基于JavaScript语言的开发包,方便用户快速搭建一款零插件的虚拟地球Web应用,并在性能,精度,渲染质量以及多平台,易用性上都有高质量的保证。上一篇文章介绍了雷达产品的动图展示,这节我们用同样的原理实现卫星动图。效果图卫星云图也是一种重要的气象观测资料,与雷达产品不同的是,..._cesium云图效果

heartbeat单独提供高可用服务_Kim_Weir的博客-程序员宅基地

heartbeat单独提供高可用服务本文目录:1.简介2.安装heartbeat 2.1 编译安装Heartbeat3.heartbeat相关配置文件 3.1 配置文件ha.cf 3.2 配置文件authkeys 3.3 配置文件haresources4.示例:heartbeat为httpd提供高可用服务 1.简介heartbeat是人所众知高可用软件。但是在以前,heartbeat是Linux...

设计模式七大原则之单一职责原则_违反单一职责原则_彩色小王的博客-程序员宅基地

一、设计模式的目的:编写软件过程中,程序员面临来自耦合性,内聚性,可维护性,可扩展性,重用性,灵活性等多方面的挑战,设计模式是为了让程序(软件),具有更好的: 代码重用性 (即:相同功能的代码,不用多次编写) 可读性 (即:编程规范性, 便于其他程序员的阅读和理解) 可扩展性 (即:当需要增加新的功能时,非常的方便,称为可维护) 可靠性 (即:当我们增加..._违反单一职责原则

WampServer服务器环境配置_wampserver3.3环境要求_风致﹏的博客-程序员宅基地

WampServer是什么WampServer是一款功能强大的PHP集成安装环境。有了这款wampserver,省去了你手动配置apache、php、mysql,只需要一键便可以完成php环境的安装。而且还为您节省 了时间和配置中出现的错误。Apache1.同个局域网内的人(通过IP地址访问)访问自己本机上的页面第一步:找到"httpd.conf"文件;点击WampServer,找到Ap..._wampserver3.3环境要求

解决Android Studio的ADB not responding错误_薛瑄的博客-程序员宅基地

来源 今天启动Android studio的时候出现“adb not responding. you can wait more, or kill "adb.exe" process manually and click 'Restart' ”这个错误: 尝试了点Wait more,Restart和Cancel按钮,都无法解决问题,重启也不行,后来在网上查了下解决的方法,说是adb

【图像超分辨率】Perceptual Losses for Real-Time Style Transfer and Super-Resolution_才能在大小为c×h×w的输入上使用c个3×3滤波器进行卷积。而这与在形状为dc×h/d_jaeden_xu的博客-程序员宅基地

论文链接:https://web.eecs.umich.edu/~justincj/papers/eccv16/JohnsonECCV16.pdfPerceptual Losses for Real-Time Style Transfer and Super-Resolution摘要1 引言2 相关工作前馈式图像变换感知优化风格转移图像超分辨率3 方法3.1 图像变换网络输入和输出下采样和上采样残差连接3.2 感知损失函数特征重建损失风格重构损失3.3 简单损失函数像素损失总变化正则化4 实验4.1 风格_才能在大小为c×h×w的输入上使用c个3×3滤波器进行卷积。而这与在形状为dc×h/d

随便推点

使用JQuery表单选择器,实现全选/全不选/和反选的效果_wFF11517的博客-程序员宅基地

使用JQuery表单选择器,实现全选/全不选/和反选的效果&lt;!DOCTYPE html&gt;&lt;html&gt; &lt;head&gt; &lt;meta charset="UTF-8"&gt; &lt;script type="text/javascript" src="js/jquery-1.8.3.js"&

expire_logs_day binlog自动过期清理binlog_expire_logs_days_进击云原生的博客-程序员宅基地

expire_logs_day 设置binlog老化日期触发时机是binlog发生切换:binlog大小超过max_binlog_size手动执行flush logs重新启动时(mysql将会new一个新文件用于记录binlog)该参数可以在线修改mysql> show variables like '%expire%';+------------------+-------+| Variable_name | Value |+-----------------_expire_logs_days

navicat导入mysql文件_mysql怎么在navicat里导入文件_有点难!的博客-程序员宅基地

前提是navicat连接到了mysql第一步左边区域你想导入的表中点击鼠标右键,出现“运行SQL文件”,点击进去,然后找到路径,导入。第二步完成_mysql怎么在navicat里导入文件

新版Airplayer--新功能介绍_jws0599的博客-程序员宅基地

新版iTools AirPlayer应广大兔粉们的要求,终于实现了历史性的突破,实现通过电脑上的镜像逆向控制苹果设备,植物大战僵尸2一秒变pc版,天天爱消除鼠标操作更快更爽!下面给出反向控制教程,需要下面几个条件1. 您的设备为A5以上芯片,支持AirPlay镜像,并使用镜像的方式投影到电脑显示器(不能是投射照片或者AirPlay视频投射)。2. 您的设备已经越狱3. 安装越狱插件ve

C语言基本并发操作简介_c语言并发_从日至臻的博客-程序员宅基地

C语言基本并发操作简介引言实验环境利用fork函数创建进程创建实例进程之间的竞争竞争实例创建线程等待线程注意事项线程实例线程共享数据共享数据实例总结引言在处理一些数据集比较繁杂且解题思路比较单一的问题时,可以通过同一个算法计算多次循环操作解决,但当对效率要求比较高的时候,如果电脑的配置比较好,通常倾向于采用并发编程,其中,线程之间由于切换比较复杂,上下文切换时间较长,并且进程间数据共享比较麻烦,这里我们通常倾向于利用多线程的方法来解决问题。这里我们简要介绍一下多进程、线程思想的一些基本操作。实验环境_c语言并发

对抗攻击与防御 (1):图像领域的对抗样本生成_图像特征提取的对抗攻击_因吉的博客-程序员宅基地

相较于其他领域,图像领域的对抗样本生成有以下优势:1)真实图像与虚假图像于观察者是直观的;2)图像数据与图像分类器的结构相对简单。本文以全连接网络和卷积神经网络为例,以MNIST、CIFAR10,以及ImageNet为基础样本,研究基于逃避对抗,包括白盒、黑盒、灰盒,以及物理攻击的图像对抗样本生成。_图像特征提取的对抗攻击

推荐文章

热门文章

相关标签