/************************************************************************ * AEC.C - C program simulation of acoustic echo canceler using * 1. mode detector (with near-end & far-end speech detectors) * 2. on-line adaptation using leaky normalized LMS algorithm * * 3. residual error of AEC is used for near-end speech detector * The first 2 seconds of receive signal is considered in the * receive mode with additonal 12 dB residual echo reduction * 4. residual echo suppressor using center clipper when echo * return loss enhancement (ERLE) > 18 dB * 5. comfort noise is injected when center clipper is enabled * 6. Attenuator on transmit is used for receive mode when * ERLE < 18 dB and for double-talk mode * ************************************************************************* * Real-time system configuration: * * ______________ * farEndIn | | speakerOut * ---------->| |-------------> * | | * | | * | Speakphone | * | System | * | | * farEndOut | | microphoneIn * <----------| |<------------- * |____________| * where * farEndIn is the received signal from far-end by telephone interface * farEndOut is the signal transmit to far-end by telephone interface * speakerOut is the signal send to drive loudspeaker in room * microphoneIn is the signal pickup by the microphone in room ************************************************************************* * Synopsis * * This program is executed by keying in the following command: * * aec3 micFile farFile txFile * * where * micFile - file name of microphoneIn signal * farFile - file name of signal receive from far-end, farEndIn * txFile - file name of signal transmit to far-end, farEndOut * *-------------------------------------------------------------------------- * Functions called by this program * * fir, shift, uran * *************************************************************************** * Specify auxiliary routines and parameter values **************************************************************************/ #include #include #include #define MAX 1024 // maximum order of arrays void main(int argc, char *argv[]) { void shift(); // function to update signal vector float fir(); // function to perform FIR filtering float uran(); // function to generate white noise /************************************************************************** * Define variable arrays, define and clear variables **************************************************************************/ int i,k; // integers for indexes int AECorder = 512; // order of AEC filter int shortWindow = 32; // length of short window, 4 ms int mediumWindow = 128; // length of medium window, 16 ms int longWindow = 16000; // length of long window, 2 second int nearFlag = 0; // flag of near-end speech detector int farFlag = 0; // flag of far-end speech detector int ms5 = 80; // 10 ms for ramp up int ms50 = 800; // 100 ms for ramp down int hangoverTime = 1600; // hangover time is 200 ms int trainTime = 16000; // training time is 2 second int farHangCount = 1600; // hangover counter for far-end speech int nearHangCount = 1600; // hangover counter for near-end speech float nfNear = 0.; // near-end noise floor float nfFar = 0.; // far-end noise floor float saveMargin = 150.; // safety margin for speech detector float thresNear = 0.; // threshold for near-end speech detector float thresFar = 0.; // threshold for far-end speech detector float farEndIn = 0.; // new data from the far-end float AECout = 0.; // output from AEC filter float speakerOut = 0.; // signal send to loudspeaker float microphoneIn = 0.; // signal pick up by microphone float farEndOut = 0.; // signal transmit to far-end float temp = 0.; // temporary storage float errorAEC = 0.; // error signal of AEC float comfortNoise = 0.; // comfort noise for transmission float clipThres = 0.; // clipping threshold for center clipper float farInPowS = 0.; // power of farEndIn by short window float farInPowM = 0.; // power of farEndIn by medium window float micInPowS = 0.; // power of microphoneIn by short window float micInPowM = 0.; // power of microphoneIn by medium window float spkOutPowS = 0.; // power of speakerOut by short window float spkOutPowM = 0.; // power of speakerOut by medium window float errorAECpowS = 0.; // power of errorAEC by short window float errorAECpowM = 0.; // power of errorAEC by medium window float alphaShort = 0.; // 1/short window length float alphaMedium = 0.; // 1/medium window length float alphaLong = 0.; // 1/long window length float alpha1Short = 0.; // 1 - alphaShort float alpha1Medium = 0.; // 1 - alphaMedium float alpha1Long = 0.; // 1 - alphaLong float rampUp = 0.; // ramp up step size float rampDown = 0.; // ramp down step size float txGain = 1.; // attenuator value for transmit signal float AECcoef[MAX]; // coefficient vector of AEC filter float AECbuf[MAX]; // signal buffer for AEC float mu = (float)(0.032/32767.);// step size for LMS update float leaky = (float)0.999969482;// leaky factor is 1. - 2**-15 /************************************************************************** * Declare file pointers **************************************************************************/ FILE *farin; // file pointer of farEndIn signal FILE *micin; // file pointer of microphoneIn signal FILE *txout; // file pointer of farEndOut signal /************************************************************************** * Define usage of executable file **************************************************************************/ if ( argc < 3 ) { fprintf(stderr,"%s\n*** Usage ***\n",argv[0]); fprintf(stderr,"aec3 micFile farFile txFile\n"); fprintf(stderr," micFile is file name of microphoneIn speech\n"); fprintf(stderr," farFile is file name of farEndIn speech\n"); fprintf(stderr," txFile is file name of farEndOut signal\n"); exit(0); } /************************************************************************** * Input data and parameters at execution time **************************************************************************/ if (( micin = fopen(argv[1],"r")) == NULL ) // open microphone signal file { fprintf(stderr,"Can't open the microphoneIn data file %s!\n",argv[1]); exit(0); } if (( farin = fopen(argv[2],"r")) == NULL ) // open far-end speech file { fprintf(stderr,"Can't open the farEndOut data file %s!\n",argv[2]); exit(0); } txout = fopen(argv[3],"w"); // open farEndOut data file for evaluation /* Calculate parameters for power estimators using different window length */ alphaShort = (float)(1./shortWindow); // alpha = 1/window length alphaMedium = (float)(1./mediumWindow); // alpha for medium window alphaLong = (float)(1./longWindow); // alpha for long window alpha1Short = (float)(1. - alphaShort); // alpha1 = 1 - alpha alpha1Medium = (float)(1. - alphaMedium);// for medium window alpha1Long = (float)(1. - alphaLong); // for long window rampUp = (float)(1./ms5); // ramp up take 10 ms rampDown = (float)(1./ms50); // ramp down take 100 ms /************************************************************************** * Clear arrays **************************************************************************/ for (i=0; i|-|RX_processing|-|---------------> * | ----------------| * | | * | Speakphone | * | System | * | | * | | * <------------| |<-------------- * farEndOut |_________________| microphoneIn * * In this program, RX_processing = 1, thus speakerOut = farEndIn * In the future, we will add following techniques as RX_processing: * (1) spectral distribution detector * (2) automatic gain control * (3) loudness enhancement * *************************************************************************** * Generate microphoneIn for speakerphone simulation **************************************************************************/ while( (fscanf(farin,"%f",&farEndIn)) != EOF) // read farEndIn data file { fscanf(micin,"%f",µphoneIn); // read microphoneIn from data file speakerOut = farEndIn; // insert RX_processing here later temp = (float)fabs(speakerOut); // temp = |speakerOut| spkOutPowS = (float)(alpha1Short*spkOutPowS + alphaShort*temp); // estimate speakerOut power by short window spkOutPowM = (float)(alpha1Medium*spkOutPowM + alphaMedium*temp); // estimate farEndIn power by medium window /************************************************************************** * Real-time speakerphone simulation: * * _______________ * farEndIn | | speakerOut to loudspeaker * --------->|RX_processing|-----|----------|-----------------> * |_____________| | | * __|__ __|__ * --------------------- | | | | * | Other processings | |NLLMS|--->| AEC | * --------------------- |_____| |_____| * | | * _____________ | |AECout * | | | -| + * <---------|TX_processing|<----|--------(sum)<-------------- * farEndOut |_____________| errorAEC micropjoneIn * * * AEC FIR filter is updated by leaky normalized LMS (LNLMS) algorithm * TX_processing: * (1) residual echo suppressor using center clipper and inject * comfort noise if there is 18 dB ERLE in receive mode * (2) attenuator with -12 dB if ERLE < 18 dB in receive mode * (3) attenuator with -6 dB in double-talk mode * Other processings: none at this program, in the future, they are: * (1) analog loudspeake volume control and detection * (2) performance monitoring to detect environment changes * (3) noise reduction * (4) automatic gain control * *************************************************************************/ /* Update far-end noise floor estimate of receiving far-end signal */ temp = (float)fabs(farEndIn); // temp = |farEndIn| farInPowS = (float)(alpha1Short*farInPowS + alphaShort*temp); // estimate farEndIn power by short window farInPowM = (float)(alpha1Medium*farInPowM + alphaMedium*temp); // estimate farEndIn power by medium window if (nfFar < farInPowS){ nfFar = (float)(alpha1Long*nfFar + alphaLong*temp); } // onset of speech, slow update using long window else{ nfFar = (float)(alpha1Medium*nfFar + alphaMedium*temp); } // offset of speech, fast decay using medium window /* Update near-endnoise floor estimate of signal pickup by microphone */ temp = (float)fabs(microphoneIn); // temp = |microphoneIn| micInPowS = (float)(alpha1Short*micInPowS + alphaShort*temp); // estimate microphone signal power by short window micInPowM = (float)(alpha1Medium*micInPowM + alphaMedium*temp); // estimate microphone signal power by short window if (nfNear < micInPowS){ nfNear = (float)(alpha1Long*nfNear + alphaLong*temp); } // onset of speech, slow update using long window else{ nfNear = (float)(alpha1Medium*nfNear + alphaMedium*temp); } // offset of speech, fast decay using medium window /* AEC filtering to obtain echo mimic and error of AEC */ shift(AECbuf,AECorder,speakerOut); // update AEC signal buffer AECout = fir(AECbuf,AECcoef,AECorder); // output of AEC, echo mimic errorAEC = microphoneIn - AECout; // errorAEC=microphoneIn-AECout errorAECpowS = (float)(alpha1Short*errorAECpowS + alphaShort*fabs(errorAEC)); // estimate power of AEC error by short window errorAECpowM = (float)(alpha1Medium*errorAECpowM + alphaMedium*fabs(errorAEC)); // estimate power of AEC error by medium window /************************************************************************ * Speech detection algorithm: * * Far-end speech detector: * if (farInPowS > thresFar) * there is far-end speech * * Near-end speech detector: * if (micInPowS > acousticLoss*spkOutPowS || erroAECpowS > thresNear) * there is near-end speech * ************************************************************************/ thresFar = (float)(1.414*nfFar + saveMargin); // threshold for far-end detector thresNear = (float)(1.414*nfNear + saveMargin); // threshold for near-end detector if (farInPowS>thresFar) { farFlag = 1; // declare far-end speech farHangCount = hangoverTime; // set hangover time counter } else { if (farHangCount > 0) // counter no expire yet farHangCount--; // decrement hangover counter else farFlag = 0; // hangover counter expired } if ((micInPowS>thresNear) && (errorAECpowS>thresNear)) { nearFlag = 1; // declare near-end speech nearHangCount = hangoverTime;// set hangover time counter } else { if (nearHangCount > 0) // counter no expire yet nearHangCount--; // decrement hangover counter else nearFlag = 0; // hangover counter expired } /************************************************************************ * Mode detector: * Transmit - near-end speech only, nearFlag = 1, farFlag = 0 * Receive - far-end speech only, farFlag = 1, nearFalg = 0 * Double-talk - both near-end and far-end speeches are present, * nearFlag = farFlag = 1 * Idle - no activity, nearFlag = farFlag = 0 * * Mode detection algorithm: * * if (farFlag == 1) there is far-end speech * if ((nearFlag == 0)||(trainTime > 0)) first 2 seconds of far-end signal * receive mode * else * double-talk mode * else there is no far-end speech * if (narFlag == 1) there is near-end speech * transmit mode * else there is n near-end speech * idle mode * * Note: we assume the first 2 seconds of farEndIn signal is in receive * mode to allow initial adaptation of AEC * * Operations at different Mode: * * Mode | TX_processing * ------------|----------------------------------------------------- * Idle | farEndOut = microphoneIn * | * Transmit | farEndOut = microphoneIn * | * Receive | farEndOut = comfortNoise if ERLE > 18 dB * | farEndOut = 0.25*errorAEC if ERLE < 18 dB (-12 dB) * | update AEC coefficients * | * double-talk | farEndOut = 0.5*errorAEC (-6 dB loss) * ***********************************************************************/ comfortNoise = (float)(50.*(uran()-0.5)); // generate comfort noise to match background noise clipThres = (float)(0.0625*micInPowM); // clipping threshold is 18 dB ERLE if (farFlag == 1) // there is far-end speech { if ((nearFlag == 0) || (trainTime > 0)) // Receive mode { /* Receive mode operations */ if (trainTime > 0) // counter is no expire yet { trainTime--; // decrement the counter if (txGain > 0.25 { txGain -= rampDown; // ramp down } farEndOut = (float)(txGain*errorAEC);// attenuate by 12 dB } if (errorAECpowM 18 dB { // enable center clipper and farEndOut = comfortNoise; // inject comfort noise } else // if ERLE < 18 dB { if (txGain > 0.25) { txGain -= rampDown; // ramp down } farEndOut = (float)(txGain*errorAEC);// disable center clipper } // attenuated by 12 dB if (farInPowM < 16000.) // signal farEndIn is in reasonable { // range, update AEC coefficients, otherwise skip adaptation temp = (float)((mu*errorAEC)/(spkOutPowM+saveMargin)); // normalized step size for (k=0; k 0.5) { txGain -= rampDown; // ramp down } if (txGain < 0.5) { txGain += rampUp; // ramp up } farEndOut = (float)(txGain*errorAEC); // attenuate 6 dB } } else // there is no far-end speech { if (nearFlag == 1) // transmit mode operation { if (txGain < 1) { txGain += rampUp; } farEndOut = txGain*microphoneIn; // full gain at transmit path } else // idle mode operation { if (txGain > 0.5) { txGain -= rampDown; // ramp down } if (txGain < 0.5) { txGain += rampUp; // ramp up } farEndOut = (float)(txGain*microphoneIn); // attenuate 6 dB } } fprintf(txout,"%d\n",(int)(farEndOut+0.5)); // write farEndOut to file } fcloseall(); } /************************************************************************** * URAN - This function generates pseudo-random numbers * **************************************************************************/ static long n = (long)12357;// seed I(0) = 12357 float uran() { float ran; // random noise r(n) n = (long)2045 * n + 1L; // I(n) = 2045 * I(n-1) + 1 n -= (n / 1048576L) * 1048576L; // I(n) = I(n) - INT[I(n)/1048576] * 1048576 ran = (float)(n + 1L) / (float)1048577; // r(n) = FLOAT[I(n) + 1] / 1048577 return(ran); // return r(n) to main function } /************************************************************************** * SHIFT - This function updates signal vector * * data stored as [x(n) x(n-1) ... x(n-N+1)] * **************************************************************************/ void shift(x, N, new) float *x; int N; float new; { int i; // index for (i=N-1; i>0; --i) { x[i] = x[i-1]; // shift old data x(n-i), i = 1, 2, ... N-1 } x[0] = (float)new; // insert new data x(n) } /************************************************************************** * FIR - This function performs FIR filtering * * ntap-1 * * y(n) = sum wi * x(n-i) * * i=0 * **************************************************************************/ float fir(x, w, ntap) float *x, *w; int ntap; { float yn; // output of FIR filter int i; // index yn = 0.0; // y(n) = 0. for (i=0; i