package com.sogou.speech.utils;

import com.lejent.zuoyeshenqi.afanti.sdk.imagesearch.ErrorCode;

/* loaded from: classes2.dex */
public class Vad {
    private static final String TAG = "VadNew";
    private static final double eps = 2.2204E-16d;
    private static final double pi = 3.1415926d;
    private double m_alfa_ff;
    private double m_alfa_sf;
    private double m_alfa_snr;
    private double[] m_ana_win;
    private double m_beta_sf;
    private double[] m_cos_fft;
    private Detectwav m_deteDetectwavNew;
    private int m_fft_size;
    private double[] m_freq_win;
    private int m_freq_win_len;
    private int m_fs;
    private int m_ind_2k;
    private int m_ind_4k;
    private int m_ind_6k;
    private int m_log_fft_size;
    private int m_max_wav_len;
    private int m_non_speech_frame_num;
    private short[] m_out_wav;
    private short[] m_out_wav_pre;
    private int m_pre_reserve_len;
    private short[] m_raw_wav;
    private int[] m_rev;
    private int m_shift_size;
    private double[] m_sin_fft;
    private double[] m_sp;
    private double[] m_sp_ff;
    private double[] m_sp_ff_pre;
    private double[] m_sp_sf;
    private int m_sp_size;
    private double[] m_sp_smooth;
    private double[] m_sp_snr;
    private int m_speech_frame_num;
    private double m_thres_02;
    private double m_thres_24;
    private double m_thres_46;
    private double m_thres_68;
    private double[] m_v_im;
    private double[] m_v_re;
    private int m_win_size;
    private double[] m_win_wav;
    private int m_reserve_len = 0;
    private int m_wav_len = 0;
    private int m_frame_sum = 0;
    private int m_out_wav_len = 0;
    private int m_out_wav_pre_len = 0;

    public Vad(int i, int i2, int i3, int i4, int i5, double d, double d2, double d3, double d4, double d5, double d6, double d7, double d8, int i6, int i7, Detectwav detectwav) {
        this.m_fs = i;
        this.m_win_size = i2;
        this.m_shift_size = i3;
        this.m_max_wav_len = this.m_win_size + i4;
        this.m_pre_reserve_len = i5;
        this.m_alfa_ff = d;
        this.m_alfa_sf = d2;
        this.m_beta_sf = d3;
        this.m_alfa_snr = d4;
        this.m_raw_wav = new short[this.m_max_wav_len];
        this.m_out_wav = new short[this.m_max_wav_len];
        this.m_out_wav_pre = new short[this.m_pre_reserve_len];
        this.m_ana_win = new double[this.m_win_size];
        for (int i8 = 0; i8 < this.m_win_size; i8++) {
            this.m_ana_win[i8] = 0.54d - (0.46d * Math.cos((((i8 * 2) + 1) * pi) / this.m_win_size));
        }
        this.m_fft_size = i6;
        this.m_sp_size = (this.m_fft_size / 2) + 1;
        this.m_ind_2k = (this.m_fft_size * 2000) / this.m_fs;
        this.m_ind_4k = (this.m_fft_size * 4000) / this.m_fs;
        this.m_ind_6k = (this.m_fft_size * ErrorCode.SEARCH_FAILED) / this.m_fs;
        this.m_thres_02 = d5;
        this.m_thres_24 = d6;
        this.m_thres_46 = d7;
        this.m_thres_68 = d8;
        this.m_rev = new int[this.m_fft_size];
        this.m_sin_fft = new double[this.m_fft_size / 2];
        this.m_cos_fft = new double[this.m_fft_size / 2];
        this.m_win_wav = new double[this.m_fft_size];
        this.m_v_re = new double[this.m_fft_size];
        this.m_v_im = new double[this.m_fft_size];
        this.m_sp = new double[this.m_sp_size];
        this.m_sp_smooth = new double[this.m_sp_size];
        this.m_sp_ff = new double[this.m_sp_size];
        this.m_sp_sf = new double[this.m_sp_size];
        this.m_sp_ff_pre = new double[this.m_sp_size];
        this.m_sp_snr = new double[this.m_sp_size];
        for (int i9 = 0; i9 < this.m_sp_size; i9++) {
            this.m_sp_snr[i9] = 1.0d;
        }
        this.m_freq_win_len = i7;
        this.m_freq_win = new double[(this.m_freq_win_len * 2) + 1];
        double d9 = 1.0d / (this.m_freq_win_len + 1);
        for (int i10 = 0; i10 < this.m_freq_win_len; i10++) {
            this.m_freq_win[i10] = (i10 + 1) * d9;
            this.m_freq_win[(this.m_freq_win_len * 2) - i10] = (i10 + 1) * d9;
        }
        this.m_freq_win[this.m_freq_win_len] = 1.0d;
        this.m_deteDetectwavNew = detectwav;
        initial_fft();
    }

    private void detect_sp_ratio(int i) {
        this.m_non_speech_frame_num = 0;
        this.m_speech_frame_num = 0;
        double[] dArr = {0.0d, 0.0d, 0.0d, 0.0d};
        int i2 = 0;
        while (this.m_win_size + i2 < this.m_wav_len) {
            this.m_frame_sum++;
            double d = 0.0d;
            for (int i3 = 0; i3 < this.m_win_size; i3++) {
                this.m_win_wav[i3] = this.m_raw_wav[i3 + i2] * this.m_ana_win[i3];
                d += this.m_raw_wav[i3 + i2] * this.m_raw_wav[i3 + i2];
            }
            double log10 = 10.0d * Math.log10((d / this.m_win_size) + eps);
            fft_dit(this.m_win_wav, this.m_v_re, this.m_v_im);
            this.m_sp[0] = 0.0d;
            for (int i4 = 1; i4 < this.m_sp_size; i4++) {
                this.m_sp[i4] = (this.m_v_re[i4] * this.m_v_re[i4]) + (this.m_v_im[i4] * this.m_v_im[i4]);
            }
            for (int i5 = 1; i5 < this.m_freq_win_len; i5++) {
                this.m_sp_smooth[i5] = 0.0d;
                double d2 = 0.0d;
                for (int i6 = 0; i6 <= this.m_freq_win_len + i5; i6++) {
                    double[] dArr2 = this.m_sp_smooth;
                    dArr2[i5] = dArr2[i5] + (this.m_sp[i6] * this.m_freq_win[(i6 - i5) + this.m_freq_win_len]);
                    d2 += this.m_freq_win[(i6 - i5) + this.m_freq_win_len];
                }
                double[] dArr3 = this.m_sp_smooth;
                dArr3[i5] = dArr3[i5] / d2;
            }
            for (int i7 = this.m_freq_win_len; i7 < (this.m_sp_size - 1) - this.m_freq_win_len; i7++) {
                this.m_sp_smooth[i7] = 0.0d;
                double d3 = 0.0d;
                for (int i8 = i7 - this.m_freq_win_len; i8 <= this.m_freq_win_len + i7; i8++) {
                    double[] dArr4 = this.m_sp_smooth;
                    dArr4[i7] = dArr4[i7] + (this.m_sp[i8] * this.m_freq_win[(i8 - i7) + this.m_freq_win_len]);
                    d3 += this.m_freq_win[(i8 - i7) + this.m_freq_win_len];
                }
                double[] dArr5 = this.m_sp_smooth;
                dArr5[i7] = dArr5[i7] / d3;
            }
            for (int i9 = (this.m_sp_size - 1) - this.m_freq_win_len; i9 < this.m_sp_size - 1; i9++) {
                this.m_sp_smooth[i9] = 0.0d;
                double d4 = 0.0d;
                for (int i10 = i9 - this.m_freq_win_len; i10 < this.m_sp_size; i10++) {
                    double[] dArr6 = this.m_sp_smooth;
                    dArr6[i9] = dArr6[i9] + (this.m_sp[i10] * this.m_freq_win[(i10 - i9) + this.m_freq_win_len]);
                    d4 += this.m_freq_win[(i10 - i9) + this.m_freq_win_len];
                }
                double[] dArr7 = this.m_sp_smooth;
                dArr7[i9] = dArr7[i9] / d4;
            }
            if (i == 1 && i2 == 0) {
                for (int i11 = 0; i11 < this.m_sp_size; i11++) {
                    double[] dArr8 = this.m_sp_ff;
                    dArr8[i11] = dArr8[i11] + (this.m_sp_smooth[i11] / 20);
                    double[] dArr9 = this.m_sp_sf;
                    dArr9[i11] = dArr9[i11] + (this.m_sp_smooth[i11] / 20);
                    double[] dArr10 = this.m_sp_ff_pre;
                    dArr10[i11] = dArr10[i11] + (this.m_sp_smooth[i11] / 20);
                }
            } else {
                for (int i12 = 0; i12 < this.m_sp_size; i12++) {
                    this.m_sp_ff[i12] = (this.m_alfa_ff * this.m_sp_ff[i12]) + ((1.0d - this.m_alfa_ff) * this.m_sp_smooth[i12]);
                }
                for (int i13 = 0; i13 < this.m_sp_size; i13++) {
                    if (this.m_sp_sf[i13] < this.m_sp_ff[i13]) {
                        this.m_sp_sf[i13] = (this.m_alfa_sf * this.m_sp_sf[i13]) + (((1.0d - this.m_alfa_sf) * (this.m_sp_ff[i13] - (this.m_beta_sf * this.m_sp_ff_pre[i13]))) / (1.0d - this.m_beta_sf));
                    } else {
                        this.m_sp_sf[i13] = this.m_sp_ff[i13];
                    }
                }
                for (int i14 = 0; i14 < this.m_sp_size; i14++) {
                    this.m_sp_snr[i14] = (this.m_alfa_snr * this.m_sp_snr[i14]) + ((1.0d - this.m_alfa_snr) * (this.m_sp_ff[i14] / (this.m_sp_sf[i14] + eps)));
                }
                for (int i15 = 0; i15 < 4; i15++) {
                    dArr[i15] = 0.0d;
                }
                for (int i16 = 1; i16 < this.m_ind_2k; i16++) {
                    if (this.m_sp_snr[i16] >= this.m_thres_02) {
                        dArr[0] = dArr[0] + 1.0d;
                    }
                }
                for (int i17 = this.m_ind_2k; i17 < this.m_ind_4k; i17++) {
                    if (this.m_sp_snr[i17] >= this.m_thres_24) {
                        dArr[1] = dArr[1] + 1.0d;
                    }
                }
                for (int i18 = this.m_ind_4k; i18 < this.m_ind_6k; i18++) {
                    if (this.m_sp_snr[i18] >= this.m_thres_46) {
                        dArr[2] = dArr[2] + 1.0d;
                    }
                }
                for (int i19 = this.m_ind_6k; i19 <= this.m_sp_size - 2; i19++) {
                    if (this.m_sp_snr[i19] >= this.m_thres_68) {
                        dArr[3] = dArr[3] + 1.0d;
                    }
                }
                boolean z = (dArr[2] + dArr[3]) / ((double) ((this.m_sp_size + (-1)) - this.m_ind_4k)) >= 0.5d;
                dArr[0] = dArr[0] / (this.m_ind_2k - 1);
                dArr[1] = dArr[1] / (this.m_ind_4k - this.m_ind_2k);
                dArr[2] = dArr[2] / (this.m_ind_6k - this.m_ind_4k);
                dArr[3] = dArr[3] / ((this.m_sp_size - 1) - this.m_ind_4k);
                int i20 = 0;
                for (int i21 = 0; i21 < 4; i21++) {
                    if (dArr[i21] >= 0.3d) {
                        i20++;
                    }
                }
                if (log10 < 55.0d) {
                    this.m_non_speech_frame_num++;
                } else if (i20 >= 1 || z) {
                    this.m_speech_frame_num++;
                } else {
                    this.m_non_speech_frame_num++;
                }
                System.arraycopy(this.m_sp_ff, 0, this.m_sp_ff_pre, 0, this.m_sp_size);
            }
            i2 += this.m_shift_size;
        }
        this.m_reserve_len = this.m_wav_len - i2;
    }

    private void fft_dit(double[] dArr, double[] dArr2, double[] dArr3) {
        for (int i = 0; i < this.m_fft_size; i++) {
            dArr2[this.m_rev[i]] = dArr[i];
            dArr3[this.m_rev[i]] = 0.0d;
        }
        int i2 = this.m_fft_size / 2;
        int i3 = 1;
        for (int i4 = 1; i4 <= this.m_log_fft_size; i4++) {
            int i5 = 0;
            int i6 = 0 + i3;
            int i7 = 0;
            while (i7 < i2) {
                for (int i8 = 0; i8 < i3; i8++) {
                    double d = (dArr2[i6] * this.m_cos_fft[i8 * i2]) + (dArr3[i6] * this.m_sin_fft[i8 * i2]);
                    double d2 = (dArr3[i6] * this.m_cos_fft[i8 * i2]) - (dArr2[i6] * this.m_sin_fft[i8 * i2]);
                    dArr2[i6] = dArr2[i5] - d;
                    dArr3[i6] = dArr3[i5] - d2;
                    dArr2[i5] = d + dArr2[i5];
                    dArr3[i5] = dArr3[i5] + d2;
                    i5++;
                    i6++;
                }
                i7++;
                int i9 = i6;
                i6 += i3;
                i5 = i9;
            }
            i2 >>= 1;
            i3 <<= 1;
        }
    }

    private void initial_fft() {
        this.m_log_fft_size = 0;
        for (int i = 1; i != this.m_fft_size; i *= 2) {
            this.m_log_fft_size++;
        }
        for (int i2 = 0; i2 < this.m_fft_size; i2++) {
            this.m_rev[i2] = 0;
            int i3 = i2;
            for (int i4 = 0; i4 < this.m_log_fft_size; i4++) {
                this.m_rev[i2] = (this.m_rev[i2] << 1) | (i3 & 1);
                i3 >>= 1;
            }
        }
        for (int i5 = 0; i5 < this.m_fft_size / 2; i5++) {
            this.m_sin_fft[i5] = Math.sin((i5 * 6.2831852d) / this.m_fft_size);
            this.m_cos_fft[i5] = Math.cos((i5 * 6.2831852d) / this.m_fft_size);
        }
    }

    private void reserve_pre_speech() {
        if (this.m_out_wav_pre_len + this.m_out_wav_len <= this.m_pre_reserve_len) {
            System.arraycopy(this.m_out_wav, 0, this.m_out_wav_pre, this.m_out_wav_pre_len, this.m_out_wav_len);
            this.m_out_wav_pre_len += this.m_out_wav_len;
            return;
        }
        int i = (this.m_out_wav_pre_len + this.m_out_wav_len) - this.m_pre_reserve_len;
        short[] sArr = new short[this.m_out_wav_pre_len - i];
        System.arraycopy(this.m_out_wav_pre, i, sArr, 0, this.m_out_wav_pre_len - i);
        System.arraycopy(sArr, 0, this.m_out_wav_pre, 0, this.m_out_wav_pre_len - i);
        System.arraycopy(this.m_out_wav, 0, this.m_out_wav_pre, this.m_pre_reserve_len - this.m_out_wav_len, this.m_out_wav_len);
        this.m_out_wav_pre_len = this.m_pre_reserve_len;
    }

    public void detect_speech(short[] sArr, int i, int i2) {
        if (sArr == null || i <= 0) {
            return;
        }
        this.m_wav_len = this.m_reserve_len + i;
        if (this.m_wav_len <= this.m_max_wav_len) {
            System.arraycopy(sArr, 0, this.m_raw_wav, this.m_reserve_len, i);
            this.m_speech_frame_num = 0;
            this.m_non_speech_frame_num = 0;
            detect_sp_ratio(i2);
            if (!this.m_deteDetectwavNew.res.m_is_speech_found) {
                reserve_pre_speech();
            }
            System.arraycopy(this.m_raw_wav, 0, this.m_out_wav, 0, this.m_wav_len - this.m_reserve_len);
            this.m_out_wav_len = this.m_wav_len - this.m_reserve_len;
            short[] sArr2 = new short[this.m_reserve_len];
            System.arraycopy(this.m_raw_wav, this.m_out_wav_len, sArr2, 0, this.m_reserve_len);
            System.arraycopy(sArr2, 0, this.m_raw_wav, 0, this.m_reserve_len);
            boolean z = this.m_speech_frame_num > 0;
            if (this.m_deteDetectwavNew.res.m_is_speech && !z) {
                this.m_deteDetectwavNew.res.m_end_wait_time = (this.m_speech_frame_num + this.m_non_speech_frame_num) * 0.0d;
            } else if (!this.m_deteDetectwavNew.res.m_is_speech && !z) {
                this.m_deteDetectwavNew.res.m_end_wait_time += (this.m_speech_frame_num + this.m_non_speech_frame_num) * 0.0d;
            }
            this.m_deteDetectwavNew.res.m_is_speech = z;
            if (this.m_deteDetectwavNew.res.m_is_speech && !this.m_deteDetectwavNew.res.m_is_speech_found) {
                this.m_deteDetectwavNew.res.m_is_first_found = true;
                this.m_deteDetectwavNew.res.m_is_speech_found = true;
            } else if (this.m_deteDetectwavNew.res.m_is_speech && this.m_deteDetectwavNew.res.m_is_speech_found) {
                this.m_deteDetectwavNew.res.m_is_first_found = false;
            }
        }
    }

    public int output_pre_speech(short[] sArr, int i) {
        if (sArr == null || this.m_out_wav_pre_len <= 0) {
            return 0;
        }
        System.arraycopy(this.m_out_wav_pre, 0, sArr, i, this.m_out_wav_pre_len);
        return this.m_out_wav_pre_len;
    }

    public int output_speech(short[] sArr, int i) {
        if (sArr == null || this.m_out_wav_len <= 0) {
            return 0;
        }
        System.arraycopy(this.m_out_wav, 0, sArr, i, this.m_out_wav_len);
        return this.m_out_wav_len;
    }
}
