kiss_fft.c

00001 /*
00002 Copyright (c) 2003-2004, Mark Borgerding
00003 
00004 All rights reserved.
00005 
00006 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
00007 
00008     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
00009     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
00010     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
00011 
00012 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00013 */
00014 
00015 
00016 #include "_kiss_fft_guts.h"
00017 /* The guts header contains all the multiplication and addition macros that are defined for
00018  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
00019  */
00020 
00021 static kiss_fft_cpx *scratchbuf=NULL;
00022 static size_t nscratchbuf=0;
00023 static kiss_fft_cpx *tmpbuf=NULL;
00024 static size_t ntmpbuf=0;
00025 
00026 #define CHECKBUF(buf,nbuf,n) \
00027     do { \
00028         if ( nbuf < (size_t)(n) ) {\
00029             free(buf); \
00030             buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
00031             nbuf = (size_t)(n); \
00032         } \
00033    }while(0)
00034 
00035 
00036 static void kf_bfly2(
00037         kiss_fft_cpx * Fout,
00038         const size_t fstride,
00039         const kiss_fft_cfg st,
00040         int m
00041         )
00042 {
00043     kiss_fft_cpx * Fout2;
00044     kiss_fft_cpx * tw1 = st->twiddles;
00045     kiss_fft_cpx t;
00046     Fout2 = Fout + m;
00047     do{
00048         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
00049 
00050         C_MUL (t,  *Fout2 , *tw1);
00051         tw1 += fstride;
00052         C_SUB( *Fout2 ,  *Fout , t );
00053         C_ADDTO( *Fout ,  t );
00054         ++Fout2;
00055         ++Fout;
00056     }while (--m);
00057 }
00058 
00059 static void kf_bfly4(
00060         kiss_fft_cpx * Fout,
00061         const size_t fstride,
00062         const kiss_fft_cfg st,
00063         const size_t m
00064         )
00065 {
00066     kiss_fft_cpx *tw1,*tw2,*tw3;
00067     kiss_fft_cpx scratch[6];
00068     size_t k=m;
00069     const size_t m2=2*m;
00070     const size_t m3=3*m;
00071 
00072     tw3 = tw2 = tw1 = st->twiddles;
00073 
00074     do {
00075         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
00076 
00077         C_MUL(scratch[0],Fout[m] , *tw1 );
00078         C_MUL(scratch[1],Fout[m2] , *tw2 );
00079         C_MUL(scratch[2],Fout[m3] , *tw3 );
00080 
00081         C_SUB( scratch[5] , *Fout, scratch[1] );
00082         C_ADDTO(*Fout, scratch[1]);
00083         C_ADD( scratch[3] , scratch[0] , scratch[2] );
00084         C_SUB( scratch[4] , scratch[0] , scratch[2] );
00085         C_SUB( Fout[m2], *Fout, scratch[3] );
00086         tw1 += fstride;
00087         tw2 += fstride*2;
00088         tw3 += fstride*3;
00089         C_ADDTO( *Fout , scratch[3] );
00090 
00091         if(st->inverse) {
00092             Fout[m].r = scratch[5].r - scratch[4].i;
00093             Fout[m].i = scratch[5].i + scratch[4].r;
00094             Fout[m3].r = scratch[5].r + scratch[4].i;
00095             Fout[m3].i = scratch[5].i - scratch[4].r;
00096         }else{
00097             Fout[m].r = scratch[5].r + scratch[4].i;
00098             Fout[m].i = scratch[5].i - scratch[4].r;
00099             Fout[m3].r = scratch[5].r - scratch[4].i;
00100             Fout[m3].i = scratch[5].i + scratch[4].r;
00101         }
00102         ++Fout;
00103     }while(--k);
00104 }
00105 
00106 static void kf_bfly3(
00107          kiss_fft_cpx * Fout,
00108          const size_t fstride,
00109          const kiss_fft_cfg st,
00110          size_t m
00111          )
00112 {
00113      size_t k=m;
00114      const size_t m2 = 2*m;
00115      kiss_fft_cpx *tw1,*tw2;
00116      kiss_fft_cpx scratch[5];
00117      kiss_fft_cpx epi3;
00118      epi3 = st->twiddles[fstride*m];
00119 
00120      tw1=tw2=st->twiddles;
00121 
00122      do{
00123          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
00124 
00125          C_MUL(scratch[1],Fout[m] , *tw1);
00126          C_MUL(scratch[2],Fout[m2] , *tw2);
00127 
00128          C_ADD(scratch[3],scratch[1],scratch[2]);
00129          C_SUB(scratch[0],scratch[1],scratch[2]);
00130          tw1 += fstride;
00131          tw2 += fstride*2;
00132 
00133          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
00134          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
00135 
00136          C_MULBYSCALAR( scratch[0] , epi3.i );
00137 
00138          C_ADDTO(*Fout,scratch[3]);
00139 
00140          Fout[m2].r = Fout[m].r + scratch[0].i;
00141          Fout[m2].i = Fout[m].i - scratch[0].r;
00142 
00143          Fout[m].r -= scratch[0].i;
00144          Fout[m].i += scratch[0].r;
00145 
00146          ++Fout;
00147      }while(--k);
00148 }
00149 
00150 static void kf_bfly5(
00151         kiss_fft_cpx * Fout,
00152         const size_t fstride,
00153         const kiss_fft_cfg st,
00154         int m
00155         )
00156 {
00157     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
00158     int u;
00159     kiss_fft_cpx scratch[13];
00160     kiss_fft_cpx * twiddles = st->twiddles;
00161     kiss_fft_cpx *tw;
00162     kiss_fft_cpx ya,yb;
00163     ya = twiddles[fstride*m];
00164     yb = twiddles[fstride*2*m];
00165 
00166     Fout0=Fout;
00167     Fout1=Fout0+m;
00168     Fout2=Fout0+2*m;
00169     Fout3=Fout0+3*m;
00170     Fout4=Fout0+4*m;
00171 
00172     tw=st->twiddles;
00173     for ( u=0; u<m; ++u ) {
00174         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
00175         scratch[0] = *Fout0;
00176 
00177         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
00178         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
00179         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
00180         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
00181 
00182         C_ADD( scratch[7],scratch[1],scratch[4]);
00183         C_SUB( scratch[10],scratch[1],scratch[4]);
00184         C_ADD( scratch[8],scratch[2],scratch[3]);
00185         C_SUB( scratch[9],scratch[2],scratch[3]);
00186 
00187         Fout0->r += scratch[7].r + scratch[8].r;
00188         Fout0->i += scratch[7].i + scratch[8].i;
00189 
00190         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
00191         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
00192 
00193         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
00194         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
00195 
00196         C_SUB(*Fout1,scratch[5],scratch[6]);
00197         C_ADD(*Fout4,scratch[5],scratch[6]);
00198 
00199         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
00200         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
00201         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
00202         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
00203 
00204         C_ADD(*Fout2,scratch[11],scratch[12]);
00205         C_SUB(*Fout3,scratch[11],scratch[12]);
00206 
00207         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
00208     }
00209 }
00210 
00211 /* perform the butterfly for one stage of a mixed radix FFT */
00212 static void kf_bfly_generic(
00213         kiss_fft_cpx * Fout,
00214         const size_t fstride,
00215         const kiss_fft_cfg st,
00216         int m,
00217         int p
00218         )
00219 {
00220     int u,k,q1,q;
00221     kiss_fft_cpx * twiddles = st->twiddles;
00222     kiss_fft_cpx t;
00223     int Norig = st->nfft;
00224 
00225     CHECKBUF(scratchbuf,nscratchbuf,p);
00226 
00227     for ( u=0; u<m; ++u ) {
00228         k=u;
00229         for ( q1=0 ; q1<p ; ++q1 ) {
00230             scratchbuf[q1] = Fout[ k  ];
00231             C_FIXDIV(scratchbuf[q1],p);
00232             k += m;
00233         }
00234 
00235         k=u;
00236         for ( q1=0 ; q1<p ; ++q1 ) {
00237             int twidx=0;
00238             Fout[ k ] = scratchbuf[0];
00239             for (q=1;q<p;++q ) {
00240                 twidx += (int)fstride * k;
00241                 if (twidx>=Norig) twidx-=Norig;
00242                 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
00243                 C_ADDTO( Fout[ k ] ,t);
00244             }
00245             k += m;
00246         }
00247     }
00248 }
00249 
00250 static
00251 void kf_work(
00252         kiss_fft_cpx * Fout,
00253         const kiss_fft_cpx * f,
00254         const size_t fstride,
00255         int in_stride,
00256         int * factors,
00257         const kiss_fft_cfg st
00258         )
00259 {
00260     kiss_fft_cpx * Fout_beg=Fout;
00261     const int p=*factors++; /* the radix  */
00262     const int m=*factors++; /* stage's fft length/p */
00263     const kiss_fft_cpx * Fout_end = Fout + p*m;
00264 
00265     if (m==1) {
00266         do{
00267             *Fout = *f;
00268             f += fstride*in_stride;
00269         }while(++Fout != Fout_end );
00270     }else{
00271         do{
00272             kf_work( Fout , f, fstride*p, in_stride, factors,st);
00273             f += fstride*in_stride;
00274         }while( (Fout += m) != Fout_end );
00275     }
00276 
00277     Fout=Fout_beg;
00278 
00279     switch (p) {
00280         case 2: kf_bfly2(Fout,fstride,st,m); break;
00281         case 3: kf_bfly3(Fout,fstride,st,m); break; 
00282         case 4: kf_bfly4(Fout,fstride,st,m); break;
00283         case 5: kf_bfly5(Fout,fstride,st,m); break; 
00284         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
00285     }
00286 }
00287 
00288 /*  facbuf is populated by p1,m1,p2,m2, ...
00289     where 
00290     p[i] * m[i] = m[i-1]
00291     m0 = n                  */
00292 static 
00293 void kf_factor(int n,int * facbuf)
00294 {
00295     int p=4;
00296     double floor_sqrt;
00297     floor_sqrt = floor( sqrt((double)n) );
00298 
00299     /*factor out powers of 4, powers of 2, then any remaining primes */
00300     do {
00301         while (n % p) {
00302             switch (p) {
00303                 case 4: p = 2; break;
00304                 case 2: p = 3; break;
00305                 default: p += 2; break;
00306             }
00307             if (p > floor_sqrt)
00308                 p = n;          /* no more factors, skip to end */
00309         }
00310         n /= p;
00311         *facbuf++ = p;
00312         *facbuf++ = n;
00313     } while (n > 1);
00314 }
00315 
00316 /*
00317  *
00318  * User-callable function to allocate all necessary storage space for the fft.
00319  *
00320  * The return value is a contiguous block of memory, allocated with malloc.  As such,
00321  * It can be freed with free(), rather than a kiss_fft-specific function.
00322  * */
00323 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
00324 {
00325     kiss_fft_cfg st=NULL;
00326     size_t memneeded = sizeof(struct kiss_fft_state)
00327         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
00328 
00329     if ( lenmem==NULL ) {
00330         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
00331     }else{
00332         if (mem != NULL && *lenmem >= memneeded)
00333             st = (kiss_fft_cfg)mem;
00334         *lenmem = memneeded;
00335     }
00336     if (st) {
00337         int i;
00338         st->nfft=nfft;
00339         st->inverse = inverse_fft;
00340 
00341         for (i=0;i<nfft;++i) {
00342             const double pi=3.141592653589793238462643383279502884197169399375105820974944;
00343             double phase = -2*pi*i / nfft;
00344             if (st->inverse)
00345                 phase *= -1;
00346             kf_cexp(st->twiddles+i, phase );
00347         }
00348 
00349         kf_factor(nfft,st->factors);
00350     }
00351     return st;
00352 }
00353 
00354 
00355 
00356     
00357 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
00358 {
00359     if (fin == fout) {
00360         CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
00361         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
00362         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
00363     }else{
00364         kf_work( fout, fin, 1,in_stride, st->factors,st );
00365     }
00366 }
00367 
00368 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
00369 {
00370     kiss_fft_stride(cfg,fin,fout,1);
00371 }
00372 
00373 
00374 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the 
00375    buffers from CHECKBUF
00376  */ 
00377 void kiss_fft_cleanup(void)
00378 {
00379     free(scratchbuf);
00380     scratchbuf = NULL;
00381     nscratchbuf=0;
00382     free(tmpbuf);
00383     tmpbuf=NULL;
00384     ntmpbuf=0;
00385 }
00386 
00387 int kiss_fft_next_fast_size(int n)
00388 {
00389     while(1) {
00390         int m=n;
00391         while ( (m%2) == 0 ) m/=2;
00392         while ( (m%3) == 0 ) m/=3;
00393         while ( (m%5) == 0 ) m/=5;
00394         if (m<=1)
00395             break; /* n is completely factorable by twos, threes, and fives */
00396         n++;
00397     }
00398     return n;
00399 }

Generated on Sun Feb 8 15:31:09 2009 for The Zenautics Matrix Project by  doxygen 1.5.4