/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1998--2019 The R Core Team * Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #include /* for INT_MAX */ #include /* for size_t */ #include /* for abs */ #include #include /* for imax2(.),..*/ /* Fast Fourier Transform * * These routines are based on code by Richard Singleton in the * book "Programs for Digital Signal Processing" put out by IEEE. * * I have translated them to C and moved the memory allocation * so that it takes place under the control of the algorithm * which calls these; for R, see ./fourier.c ~~~~~~~~~~~ * * void fft_factor(int n, int *maxf, int *maxp) * * This factorizes the series length and computes the values of * maxf and maxp which determine the amount of scratch storage * required by the algorithm. * * If maxf is zero on return, an error occured during factorization. * The nature of the error can be determined from the value of maxp. * If maxp is zero, an invalid (zero) parameter was passed and * if maxp is one, the internal nfac array was too small. This can only * happen for series lengths which exceed 12,754,584. * * PROBLEM (see fftmx below): nfac[] is overwritten by fftmx() in fft_work() * ------- Consequence: fft_factor() must be called way too often, * at least from do_mvfft() [ ../main/fourier.c ] * * The following arrays need to be allocated following the call to * fft_factor and preceding the call to fft_work. * * work double[4*maxf] * iwork int[maxp] * * int fft_work(double *a, double *b, int nseg, int n, int nspn, * int isn, double *work, int *iwork) * * The routine returns 1 if the transform was completed successfully and * 0 if invalid values of the parameters were supplied. * * Ross Ihaka * University of Auckland * February 1997 * ========================================================================== * * Header from the original Singleton algorithm: * * -------------------------------------------------------------- * subroutine: fft * multivariate complex fourier transform, computed in place * using mixed-radix fast fourier transform algorithm. * -------------------------------------------------------------- * * arrays a and b originally hold the real and imaginary * components of the data, and return the real and * imaginary components of the resulting fourier coefficients. * multivariate data is indexed according to the fortran * array element successor function, without limit * on the number of implied multiple subscripts. * the subroutine is called once for each variate. * the calls for a multivariate transform may be in any order. * * n is the dimension of the current variable. * nspn is the spacing of consecutive data values * while indexing the current variable. * nseg nseg*n*nspn is the total number of complex data values. * isn the sign of isn determines the sign of the complex * exponential, and the magnitude of isn is normally one. * the magnitude of isn determines the indexing increment for a&b. * * if fft is called twice, with opposite signs on isn, an * identity transformation is done...calls can be in either order. * the results are scaled by 1/n when the sign of isn is positive. * * a tri-variate transform with a(n1,n2,n3), b(n1,n2,n3) * is computed by * call fft(a,b,n2*n3,n1, 1, -1) * call fft(a,b,n3 ,n2,n1, -1) * call fft(a,b,1, n3,n1*n2,-1) * * a single-variate transform of n complex data values is computed by * call fft(a,b,1,n,1,-1) * * the data may alternatively be stored in a single complex * array a, then the magnitude of isn changed to two to * give the correct indexing increment and a(2) used to * pass the initial address for the sequence of imaginary * values, e.g. * call fft(a,a(2),nseg,n,nspn,-2) * * nfac[15] (array) is working storage for factoring n. the smallest * number exceeding the 15 locations provided is 12,754,584. * * Update in R 3.1.0: nfac[20], increased array size. It is now possible to * factor any positive int n, up to 2^31 - 1. */ static void fftmx(double *a, double *b, int ntot, int n, int nspan, int isn, int m, int kt, double *at, double *ck, double *bt, double *sk, int *np, int *nfac) { /* called from fft_work() */ /* Design BUG: One purpose of fft_factor() would be to compute * ---------- nfac[] once and for all; and fft_work() [i.e. fftmx ] * could reuse the factorization. * However: nfac[] is `destroyed' currently in the code below */ double aa, aj, ajm, ajp, ak, akm, akp; double bb, bj, bjm, bjp, bk, bkm, bkp; double c1, c2=0, c3=0, c72, cd; double dr, rad; double s1, s120, s2=0, s3=0, s72, sd; int i, inc, j, jc, jf, jj; int k, k1, k2, k3=0, k4, kk, klim, ks, kspan, kspnn; int lim, maxf, mm, nn, nt; // converted from Fortran, so 1-based indexing a--; b--; at--; ck--; bt--; sk--; np--; // nfac has had indexing converted to avoid compiler warnings inc = abs(isn); nt = inc*ntot; ks = inc*nspan; rad = M_PI_4;/* = pi/4 =^= 45 degrees */ s72 = rad/0.625;/* 72 = 45 / .625 degrees */ c72 = cos(s72); s72 = sin(s72); s120 = 0.5*M_SQRT_3;/* sin(120) = sqrt(3)/2 */ if(isn <= 0) { s72 = -s72; s120 = -s120; rad = -rad; } else { #ifdef SCALING /* scale by 1/n for isn > 0 */ ak = 1.0/n; for(j = 1 ; j <= nt ; j += inc) { a[j] *= ak; b[j] *= ak; } #endif } kspan = ks; nn = nt - inc; jc = ks/n; /* sin, cos values are re-initialized each lim steps */ lim = 32; klim = lim*jc; i = 0; jf = 0; maxf = nfac[m - kt - 1]; if(kt > 0) maxf = imax2(nfac[kt-1], maxf); /* compute fourier transform */ L_start: dr = (8.0*jc)/kspan; cd = sin(0.5*dr*rad); cd = 2.0*cd*cd; sd = sin(dr*rad); kk = 1; i++; if( nfac[i-1] != 2) goto L110; /* transform for factor of 2 (including rotation factor) */ kspan /= 2; k1 = kspan + 2; do { do { k2 = kk + kspan; ak = a[k2]; bk = b[k2]; a[k2] = a[kk] - ak; b[k2] = b[kk] - bk; a[kk] += ak; b[kk] += bk; kk = k2 + kspan; } while(kk <= nn); kk -= nn; } while(kk <= jc); if(kk > kspan) goto L_fin; L60: c1 = 1.0 - cd; s1 = sd; mm = imin2(k1/2,klim); goto L80; L70: ak = c1 - (cd*c1+sd*s1); s1 = (sd*c1-cd*s1) + s1; /* the following three statements compensate for truncation error. */ /* if rounded arithmetic is used (nowadays always ?!), substitute c1=ak */ #ifdef TRUNCATED_ARITHMETIC c1 = 0.5/(ak*ak+s1*s1) + 0.5; s1 = c1*s1; c1 = c1*ak; #else c1 = ak; #endif L80: do { k2 = kk + kspan; ak = a[kk] - a[k2]; bk = b[kk] - b[k2]; a[kk] += a[k2]; b[kk] += b[k2]; a[k2] = c1*ak - s1*bk; b[k2] = s1*ak + c1*bk; kk = k2 + kspan; } while(kk < nt); k2 = kk - nt; c1 = -c1; kk = k1 - k2; if( kk > k2) goto L80; kk += jc; if(kk <= mm) goto L70; if(kk >= k2) { k1 = k1 + inc + inc; kk = (k1-kspan)/2 + jc; if( kk <= jc+jc) goto L60; goto L_start; } s1 = ((kk-1)/jc)*dr*rad; c1 = cos(s1); s1 = sin(s1); mm = imin2(k1/2,mm+klim); goto L80; /* transform for factor of 3 (optional code) */ L100: k1 = kk + kspan; k2 = k1 + kspan; ak = a[kk]; bk = b[kk]; aj = a[k1] + a[k2]; bj = b[k1] + b[k2]; a[kk] = ak + aj; b[kk] = bk + bj; ak = -0.5*aj + ak; bk = -0.5*bj + bk; aj = (a[k1]-a[k2])*s120; bj = (b[k1]-b[k2])*s120; a[k1] = ak - bj; b[k1] = bk + aj; a[k2] = ak + bj; b[k2] = bk - aj; kk = k2 + kspan; if( kk < nn) goto L100; kk = kk - nn; if( kk <= kspan) goto L100; goto L290; /* transform for factor of 4 */ L110: if( nfac[i-1] != 4) goto L_f_odd; kspnn = kspan; kspan /= 4; L120: c1 = 1.0; s1 = 0; mm = imin2(kspan,klim); goto L150; L130: c2 = c1 - (cd*c1+sd*s1); s1 = (sd*c1-cd*s1) + s1; /* the following three statements compensate for truncation error. */ /* if rounded arithmetic is used (nowadays always ?!), substitute c1=c2 */ #ifdef TRUNCATED_ARITHMETIC c1 = 0.5/(c2*c2+s1*s1) + 0.5; s1 = c1*s1; c1 = c1*c2; #else c1 = c2; #endif L140: c2 = c1*c1 - s1*s1; s2 = c1*s1*2.0; c3 = c2*c1 - s2*s1; s3 = c2*s1 + s2*c1; L150: k1 = kk + kspan; k2 = k1 + kspan; k3 = k2 + kspan; akp = a[kk] + a[k2]; akm = a[kk] - a[k2]; ajp = a[k1] + a[k3]; ajm = a[k1] - a[k3]; a[kk] = akp + ajp; ajp = akp - ajp; bkp = b[kk] + b[k2]; bkm = b[kk] - b[k2]; bjp = b[k1] + b[k3]; bjm = b[k1] - b[k3]; b[kk] = bkp + bjp; bjp = bkp - bjp; if( isn < 0) goto L180; akp = akm - bjm; akm = akm + bjm; bkp = bkm + ajm; bkm = bkm - ajm; if( s1 == 0.0) goto L190; L160: a[k1] = akp*c1 - bkp*s1; b[k1] = akp*s1 + bkp*c1; a[k2] = ajp*c2 - bjp*s2; b[k2] = ajp*s2 + bjp*c2; a[k3] = akm*c3 - bkm*s3; b[k3] = akm*s3 + bkm*c3; kk = k3 + kspan; if( kk <= nt) goto L150; L170: kk = kk - nt + jc; if( kk <= mm) goto L130; if( kk < kspan) goto L200; kk = kk - kspan + inc; if(kk <= jc) goto L120; if(kspan == jc) goto L_fin; goto L_start; L180: akp = akm + bjm; akm = akm - bjm; bkp = bkm - ajm; bkm = bkm + ajm; if( s1 != 0.0) goto L160; L190: a[k1] = akp; b[k1] = bkp; a[k2] = ajp; b[k2] = bjp; a[k3] = akm; b[k3] = bkm; kk = k3 + kspan; if( kk <= nt) goto L150; goto L170; L200: s1 = ((kk-1)/jc)*dr*rad; c1 = cos(s1); s1 = sin(s1); mm = imin2(kspan,mm+klim); goto L140; /* transform for factor of 5 (optional code) */ L_f5: c2 = c72*c72 - s72*s72; s2 = 2.0*c72*s72; L220: k1 = kk + kspan; k2 = k1 + kspan; k3 = k2 + kspan; k4 = k3 + kspan; akp = a[k1] + a[k4]; akm = a[k1] - a[k4]; bkp = b[k1] + b[k4]; bkm = b[k1] - b[k4]; ajp = a[k2] + a[k3]; ajm = a[k2] - a[k3]; bjp = b[k2] + b[k3]; bjm = b[k2] - b[k3]; aa = a[kk]; bb = b[kk]; a[kk] = aa + akp + ajp; b[kk] = bb + bkp + bjp; ak = akp*c72 + ajp*c2 + aa; bk = bkp*c72 + bjp*c2 + bb; aj = akm*s72 + ajm*s2; bj = bkm*s72 + bjm*s2; a[k1] = ak - bj; a[k4] = ak + bj; b[k1] = bk + aj; b[k4] = bk - aj; ak = akp*c2 + ajp*c72 + aa; bk = bkp*c2 + bjp*c72 + bb; aj = akm*s2 - ajm*s72; bj = bkm*s2 - bjm*s72; a[k2] = ak - bj; a[k3] = ak + bj; b[k2] = bk + aj; b[k3] = bk - aj; kk = k4 + kspan; if( kk < nn) goto L220; kk = kk - nn; if( kk <= kspan) goto L220; goto L290; /* transform for odd factors */ L_f_odd: k = nfac[i-1]; kspnn = kspan; kspan /= k; if(k == 3) goto L100; if(k == 5) goto L_f5; if(k == jf) goto L250; jf = k; s1 = rad/(k/8.0); c1 = cos(s1); s1 = sin(s1); ck[jf] = 1.0; sk[jf] = 0.0; for(j = 1; j < k; j++) { /* k is changing as well */ ck[j] = ck[k]*c1 + sk[k]*s1; sk[j] = ck[k]*s1 - sk[k]*c1; k--; ck[k] = ck[j]; sk[k] = -sk[j]; } L250: k1 = kk; k2 = kk + kspnn; aa = a[kk]; bb = b[kk]; ak = aa; bk = bb; j = 1; k1 = k1 + kspan; L260: k2 = k2 - kspan; j++; at[j] = a[k1] + a[k2]; ak = at[j] + ak; bt[j] = b[k1] + b[k2]; bk = bt[j] + bk; j++; at[j] = a[k1] - a[k2]; bt[j] = b[k1] - b[k2]; k1 = k1 + kspan; if( k1 < k2) goto L260; a[kk] = ak; b[kk] = bk; k1 = kk; k2 = kk + kspnn; j = 1; L270: k1 += kspan; k2 -= kspan; jj = j; ak = aa; bk = bb; aj = 0.0; bj = 0.0; k = 1; for(k = 2; k < jf; k++) { ak += at[k]*ck[jj]; bk += bt[k]*ck[jj]; k++; aj += at[k]*sk[jj]; bj += bt[k]*sk[jj]; jj += j; if(jj > jf) jj -= jf; } k = jf - j; a[k1] = ak - bj; b[k1] = bk + aj; a[k2] = ak + bj; b[k2] = bk - aj; j++; if( j < k) goto L270; kk = kk + kspnn; if( kk <= nn) goto L250; kk = kk - nn; if( kk <= kspan) goto L250; /* multiply by rotation factor (except for factors of 2 and 4) */ L290: if(i == m) goto L_fin; kk = jc + 1; L300: c2 = 1.0 - cd; s1 = sd; mm = imin2(kspan,klim); do { /* L320: */ c1 = c2; s2 = s1; kk += kspan; do { /* L330: */ do { ak = a[kk]; a[kk] = c2*ak - s2*b[kk]; b[kk] = s2*ak + c2*b[kk]; kk += kspnn; } while(kk <= nt); ak = s1*s2; s2 = s1*c2 + c1*s2; c2 = c1*c2 - ak; kk += -nt + kspan; } while(kk <= kspnn); kk += -kspnn + jc; if(kk <= mm) { /* L310: */ c2 = c1 - (cd*c1+sd*s1); s1 = s1 + (sd*c1-cd*s1); /* the following three statements compensate for truncation error.*/ /* if rounded arithmetic is used (nowadays always ?!), they may be deleted. */ #ifdef TRUNCATED_ARITHMETIC c1 = 0.5/(c2*c2+s1*s1) + 0.5; s1 = c1*s1; c2 = c1*c2; #endif continue/* goto L320*/; } if(kk >= kspan) { kk = kk - kspan + jc + inc; if( kk <= jc+jc) goto L300; goto L_start; } s1 = ((kk-1)/jc)*dr*rad; c2 = cos(s1); s1 = sin(s1); mm = imin2(kspan,mm+klim); } while(1); /*------------------------------------------------------------*/ /* permute the results to normal order---done in two stages */ /* permutation for square factors of n */ L_fin: np[1] = ks; if( kt == 0) goto L440; k = kt + kt + 1; if( m < k) k--; np[k+1] = jc; for(j = 1; j < k; j++, k--) { np[j+1] = np[j]/nfac[j-1]; np[k] = np[k+1]*nfac[j-1]; } k3 = np[k+1]; kspan = np[2]; kk = jc + 1; k2 = kspan + 1; j = 1; if(n == ntot) { /* permutation for single-variate transform (optional code) */ L370: do { ak = a[kk]; a[kk] = a[k2]; a[k2] = ak; bk = b[kk]; b[kk] = b[k2]; b[k2] = bk; kk += inc; k2 += kspan; } while(k2 < ks); L380: do { k2 -= np[j]; j++; k2 += np[j+1]; } while(k2 > np[j]); j = 1; do { if(kk < k2) goto L370; kk += inc; k2 += kspan; } while(k2 < ks); if( kk < ks) goto L380; jc = k3; } else { /* permutation for multivariate transform */ L400: k = kk + jc; do { ak = a[kk]; a[kk] = a[k2]; a[k2] = ak; bk = b[kk]; b[kk] = b[k2]; b[k2] = bk; kk += inc; k2 += inc; } while( kk < k); kk += ks - jc; k2 += ks - jc; if(kk < nt) goto L400; k2 += - nt + kspan; kk += - nt + jc; if( k2 < ks) goto L400; do { do { k2 -= np[j]; j++; k2 += np[j+1]; } while(k2 > np[j]); j = 1; do { if(kk < k2) goto L400; kk += jc; k2 += kspan; } while(k2 < ks); } while(kk < ks); jc = k3; } L440: if( 2*kt+1 >= m) return; kspnn = np[kt+1]; /* permutation for square-free factors of n */ /* Here, nfac[] is overwritten... -- now CUMULATIVE ("cumprod") factors */ nn = m - kt; nfac[nn] = 1; for(j = nn; j > kt; j--) nfac[j-1] *= nfac[j]; kt++; nn = nfac[kt-1] - 1; jj = 0; j = 0; goto L480; L460: jj -= k2; k2 = kk; k++; kk = nfac[k-1]; L470: jj += kk; if( jj >= k2) goto L460; np[j] = jj; L480: k2 = nfac[kt-1]; k = kt + 1; kk = nfac[k-1]; j++; if( j <= nn) goto L470; /* determine the permutation cycles of length greater than 1 */ j = 0; goto L500; do { do { k = kk; kk = np[k]; np[k] = -kk; } while(kk != j); k3 = kk; L500: do { j++; kk = np[j]; } while(kk < 0); } while(kk != j); np[j] = -j; if( j != nn) goto L500; maxf *= inc; goto L570; /* reorder a and b, following the permutation cycles */ L_ord: do j--; while(np[j] < 0); jj = jc; L520: kspan = imin2(jj,maxf); jj -= kspan; k = np[j]; kk = jc*k + i + jj; for(k1 = kk + kspan, k2 = 1; k1 != kk; k1 -= inc, k2++) { at[k2] = a[k1]; bt[k2] = b[k1]; } do { k1 = kk + kspan; k2 = k1 - jc*(k+np[k]); k = -np[k]; do { a[k1] = a[k2]; b[k1] = b[k2]; k1 -= inc; k2 -= inc; } while( k1 != kk); kk = k2; } while(k != j); for(k1 = kk + kspan, k2 = 1; k1 > kk; k1 -= inc, k2++) { a[k1] = at[k2]; b[k1] = bt[k2]; } if(jj != 0) goto L520; if( j != 1) goto L_ord; L570: j = k3 + 1; nt = nt - kspnn; i = nt - inc + 1; if( nt >= 0) goto L_ord; } /* fftmx */ static int old_n = 0; static int nfac[20]; static int m_fac; static int kt; static int maxf; static int maxp; /* At the end of factorization, * nfac[] contains the factors, * m_fac contains the number of factors and * kt contains the number of square factors */ /* non-API, but used by package RandomFields */ void fft_factor(int n, int *pmaxf, int *pmaxp) { /* fft_factor - factorization check and determination of memory * requirements for the fft. * * On return, *pmaxf will give the maximum factor size * and *pmaxp will give the amount of integer scratch storage required. * * If *pmaxf == 0, there was an error, the error type is indicated by *pmaxp: * * If *pmaxp == 0 There was an illegal zero parameter among nseg, n, and nspn. * If *pmaxp == 1 There were more than 20 factors to ntot. */ int j, jj, k, sqrtk, kchanged; /* check series length */ if (n <= 0) { old_n = 0; *pmaxf = 0; *pmaxp = 0; return; } else old_n = n; /* determine the factors of n */ m_fac = 0; k = n;/* k := remaining unfactored factor of n */ if (k == 1) return; /* extract square factors first ------------------ */ /* extract 4^2 = 16 separately * ==> at most one remaining factor 2^2 = 4, done below */ while(k % 16 == 0) { nfac[m_fac++] = 4; k /= 16; } /* extract 3^2, 5^2, ... */ kchanged = 0; sqrtk = (int)sqrt(k); for(j = 3; j <= sqrtk; j += 2) { jj = j * j; while(k % jj == 0) { nfac[m_fac++] = j; k /= jj; kchanged = 1; } if (kchanged) { kchanged = 0; sqrtk = (int)sqrt(k); } } if(k <= 4) { kt = m_fac; nfac[m_fac] = k; if(k != 1) m_fac++; } else { if(k % 4 == 0) { nfac[m_fac++] = 2; k /= 4; } /* all square factors out now, but k >= 5 still */ kt = m_fac; maxp = imax2(kt+kt+2, k-1); j = 2; do { if (k % j == 0) { nfac[m_fac++] = j; k /= j; } if (j > INT_MAX - 2) break; j = ((j+1)/2)*2 + 1; } while(j <= k); } if (m_fac <= kt+1) maxp = m_fac+kt+1; if (m_fac+kt > 20) { /* error - too many factors */ old_n = 0; *pmaxf = 0; *pmaxp = 0; return; } else { if (kt != 0) { j = kt; while(j != 0) nfac[m_fac++] = nfac[--j]; } maxf = nfac[m_fac-kt-1]; /* The last squared factor is not necessarily the largest PR#1429 */ if (kt > 0) maxf = imax2(nfac[kt-1], maxf); if (kt > 1) maxf = imax2(nfac[kt-2], maxf); if (kt > 2) maxf = imax2(nfac[kt-3], maxf); } *pmaxf = maxf; *pmaxp = maxp; } Rboolean fft_work(double *a, double *b, int nseg, int n, int nspn, int isn, double *work, int *iwork) { /* check that factorization was successful */ if(old_n == 0) return FALSE; /* check that the parameters match those of the factorization call */ if(n != old_n || nseg <= 0 || nspn <= 0 || isn == 0) return FALSE; /* perform the transform */ size_t mf = maxf; int nspan = n * nspn, ntot = nspan * nseg; fftmx(a, b, ntot, n, nspan, isn, m_fac, kt, work, work+mf, work+2*mf, work+3*mf, iwork, nfac); return TRUE; }