/*
 *  R : A Computer Language for Statistical Data Analysis
 *  Copyright (C) 1998--2019  The R Core Team
 *  Copyright (C) 1995, 1996, 1997  Robert Gentleman and Ross Ihaka
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  https://www.R-project.org/Licenses/
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <limits.h> /* for INT_MAX */
#include <stddef.h> /* for size_t */
#include <stdlib.h> /* for abs */
#include <math.h>
#include <Rmath.h> /* for imax2(.),..*/

/*  Fast Fourier Transform
 *
 *  These routines are based on code by Richard Singleton in the
 *  book "Programs for Digital Signal Processing" put out by IEEE.
 *
 *  I have translated them to C and moved the memory allocation
 *  so that it takes place under the control of the algorithm
 *  which calls these; for R, see ./fourier.c
                                  ~~~~~~~~~~~
 *
 *  void fft_factor(int n, int *maxf, int *maxp)
 *
 *	This factorizes the series length and computes the values of
 *	maxf and maxp which determine the amount of scratch storage
 *	required by the algorithm.
 *
 *	If maxf is zero on return, an error occured during factorization.
 *	The nature of the error can be determined from the value of maxp.
 *	If maxp is zero, an invalid (zero) parameter was passed and
 *	if maxp is one,	 the internal nfac array was too small.	 This can only
 *	happen for series lengths which exceed 12,754,584.
 *
 * PROBLEM (see fftmx  below):	nfac[] is overwritten by fftmx() in fft_work()
 * -------  Consequence:  fft_factor() must be called way too often,
 * at least from  do_mvfft() [ ../main/fourier.c ]
 *
 *	The following arrays need to be allocated following the call to
 *	fft_factor and preceding the call to fft_work.
 *
 *		work	double[4*maxf]
 *		iwork	int[maxp]
 *
 *  int fft_work(double *a, double *b, int nseg, int n, int nspn,
 *		 int isn, double *work, int *iwork)
 *
 *	The routine returns 1 if the transform was completed successfully and
 *			    0 if invalid values of the parameters were supplied.
 *
 *  Ross Ihaka
 *  University of Auckland
 *  February 1997
 *  ==========================================================================
 *
 *  Header from the original Singleton algorithm:
 *
 * --------------------------------------------------------------
 * subroutine:	 fft
 * multivariate complex fourier transform, computed in place
 * using mixed-radix fast fourier transform algorithm.
 * --------------------------------------------------------------
 *
 * arrays a and b originally hold the real and imaginary
 *	components of the data, and return the real and
 *	imaginary components of the resulting fourier coefficients.
 * multivariate data is indexed according to the fortran
 *	array element successor function, without limit
 *	on the number of implied multiple subscripts.
 *	the subroutine is called once for each variate.
 *	the calls for a multivariate transform may be in any order.
 *
 * n	is the dimension of the current variable.
 * nspn is the spacing of consecutive data values
 *	while indexing the current variable.
 * nseg nseg*n*nspn is the total number of complex data values.
 * isn	the sign of isn determines the sign of the complex
 *	exponential, and the magnitude of isn is normally one.
 *	the magnitude of isn determines the indexing increment for a&b.
 *
 * if fft is called twice, with opposite signs on isn, an
 *	identity transformation is done...calls can be in either order.
 *	the results are scaled by 1/n when the sign of isn is positive.
 *
 * a tri-variate transform with a(n1,n2,n3), b(n1,n2,n3)
 * is computed by
 *	call fft(a,b,n2*n3,n1, 1,   -1)
 *	call fft(a,b,n3	  ,n2,n1,   -1)
 *	call fft(a,b,1,	   n3,n1*n2,-1)
 *
 * a single-variate transform of n complex data values is computed by
 *	call fft(a,b,1,n,1,-1)
 *
 * the data may alternatively be stored in a single complex
 *	array a, then the magnitude of isn changed to two to
 *	give the correct indexing increment and a(2) used to
 *	pass the initial address for the sequence of imaginary
 *	values, e.g.
 *	   call fft(a,a(2),nseg,n,nspn,-2)
 *
 * nfac[15] (array) is working storage for factoring n.	 the smallest
 *	number exceeding the 15 locations provided is 12,754,584.
 *
 * Update in R 3.1.0: nfac[20], increased array size. It is now possible to
 * factor any positive int n, up to 2^31 - 1.
 */

static void fftmx(double *a, double *b, int ntot, int n, int nspan, int isn,
		  int m, int kt, double *at, double *ck, double *bt, double *sk,
		  int *np, int *nfac)
{
/* called from	fft_work() */

/* Design BUG:	One purpose of fft_factor() would be to compute
 * ----------	nfac[] once and for all; and fft_work() [i.e. fftmx ]
 *		could reuse the factorization.
 * However: nfac[] is `destroyed' currently in the code below
 */
    double aa, aj, ajm, ajp, ak, akm, akp;
    double bb, bj, bjm, bjp, bk, bkm, bkp;
    double c1, c2=0, c3=0, c72, cd;
    double dr, rad;
    double s1, s120, s2=0, s3=0, s72, sd;
    int i, inc, j, jc, jf, jj;
    int k, k1, k2, k3=0, k4, kk, klim, ks, kspan, kspnn;
    int lim, maxf, mm, nn, nt;

    // converted from Fortran, so 1-based indexing
    a--; b--; at--; ck--; bt--; sk--; np--;
    // nfac has had indexing converted to avoid compiler warnings

    inc = abs(isn);
    nt = inc*ntot;
    ks = inc*nspan;
    rad = M_PI_4;/* = pi/4 =^= 45 degrees */
    s72 = rad/0.625;/* 72 = 45 / .625  degrees */
    c72 = cos(s72);
    s72 = sin(s72);
    s120 = 0.5*M_SQRT_3;/* sin(120) = sqrt(3)/2 */
    if(isn <= 0) {
	s72 = -s72;
	s120 = -s120;
	rad = -rad;
    } else {
#ifdef SCALING
	/* scale by 1/n for isn > 0 */
	ak = 1.0/n;
	for(j = 1 ; j <= nt ; j += inc) {
	    a[j] *= ak;
	    b[j] *= ak;
	}
#endif
    }

    kspan = ks;
    nn = nt - inc;
    jc = ks/n;

	/* sin, cos values are re-initialized each lim steps */

    lim = 32;
    klim = lim*jc;
    i = 0;
    jf = 0;
    maxf = nfac[m - kt - 1];
    if(kt > 0) maxf = imax2(nfac[kt-1], maxf);

	/* compute fourier transform */

L_start:
    dr = (8.0*jc)/kspan;
    cd = sin(0.5*dr*rad);
    cd = 2.0*cd*cd;
    sd = sin(dr*rad);
    kk = 1;
    i++;
    if( nfac[i-1] != 2) goto L110;

/* transform for factor of 2 (including rotation factor) */

    kspan /= 2;
    k1 = kspan + 2;
    do {
	do {
	    k2 = kk + kspan;
	    ak = a[k2];
	    bk = b[k2];
	    a[k2] = a[kk] - ak;
	    b[k2] = b[kk] - bk;
	    a[kk] += ak;
	    b[kk] += bk;
	    kk = k2 + kspan;
	} while(kk <= nn);
	kk -= nn;
    } while(kk <= jc);

    if(kk > kspan) goto L_fin;
L60:
    c1 = 1.0 - cd;
    s1 = sd;
    mm = imin2(k1/2,klim);
    goto L80;

L70:
    ak = c1 - (cd*c1+sd*s1);
    s1 = (sd*c1-cd*s1) + s1;

/* the following three statements compensate for truncation error. */
/* if rounded arithmetic is used (nowadays always ?!), substitute  c1=ak */
#ifdef TRUNCATED_ARITHMETIC
    c1 = 0.5/(ak*ak+s1*s1) + 0.5;
    s1 = c1*s1;
    c1 = c1*ak;
#else
    c1 = ak;
#endif

L80:
    do {
	k2 = kk + kspan;
	ak = a[kk] - a[k2];
	bk = b[kk] - b[k2];
	a[kk] += a[k2];
	b[kk] += b[k2];
	a[k2] = c1*ak - s1*bk;
	b[k2] = s1*ak + c1*bk;
	kk = k2 + kspan;
    } while(kk < nt);
    k2 = kk - nt;
    c1 = -c1;
    kk = k1 - k2;
    if( kk > k2) goto L80;
    kk += jc;
    if(kk <= mm) goto L70;
    if(kk >= k2) {
	k1 = k1 + inc + inc;
	kk = (k1-kspan)/2 + jc;
	if( kk <= jc+jc) goto L60;
	goto L_start;
    }

    s1 = ((kk-1)/jc)*dr*rad;
    c1 = cos(s1);
    s1 = sin(s1);
    mm = imin2(k1/2,mm+klim);
    goto L80;

/* transform for factor of 3 (optional code) */

L100:
    k1 = kk + kspan;
    k2 = k1 + kspan;
    ak = a[kk];
    bk = b[kk];
    aj = a[k1] + a[k2];
    bj = b[k1] + b[k2];
    a[kk] = ak + aj;
    b[kk] = bk + bj;
    ak = -0.5*aj + ak;
    bk = -0.5*bj + bk;
    aj = (a[k1]-a[k2])*s120;
    bj = (b[k1]-b[k2])*s120;
    a[k1] = ak - bj;
    b[k1] = bk + aj;
    a[k2] = ak + bj;
    b[k2] = bk - aj;
    kk = k2 + kspan;
    if( kk < nn) goto L100;
    kk = kk - nn;
    if( kk <= kspan) goto L100;
    goto L290;

/* transform for factor of 4 */

L110:
    if( nfac[i-1] != 4) goto L_f_odd;
    kspnn = kspan;
    kspan /= 4;
L120:
    c1 = 1.0;
    s1 = 0;
    mm = imin2(kspan,klim);
    goto L150;
L130:
    c2 = c1 - (cd*c1+sd*s1);
    s1 = (sd*c1-cd*s1) + s1;

/* the following three statements compensate for truncation error. */
/* if rounded arithmetic is used (nowadays always ?!), substitute  c1=c2 */
#ifdef TRUNCATED_ARITHMETIC
    c1 = 0.5/(c2*c2+s1*s1) + 0.5;
    s1 = c1*s1;
    c1 = c1*c2;
#else
    c1 = c2;
#endif

L140:
    c2 = c1*c1 - s1*s1;
    s2 = c1*s1*2.0;
    c3 = c2*c1 - s2*s1;
    s3 = c2*s1 + s2*c1;

L150:
    k1 = kk + kspan;
    k2 = k1 + kspan;
    k3 = k2 + kspan;
    akp = a[kk] + a[k2];
    akm = a[kk] - a[k2];
    ajp = a[k1] + a[k3];
    ajm = a[k1] - a[k3];
    a[kk] = akp + ajp;
    ajp = akp - ajp;
    bkp = b[kk] + b[k2];
    bkm = b[kk] - b[k2];
    bjp = b[k1] + b[k3];
    bjm = b[k1] - b[k3];
    b[kk] = bkp + bjp;
    bjp = bkp - bjp;
    if( isn < 0) goto L180;
    akp = akm - bjm;
    akm = akm + bjm;
    bkp = bkm + ajm;
    bkm = bkm - ajm;
    if( s1 == 0.0) goto L190;
L160:
    a[k1] = akp*c1 - bkp*s1;
    b[k1] = akp*s1 + bkp*c1;
    a[k2] = ajp*c2 - bjp*s2;
    b[k2] = ajp*s2 + bjp*c2;
    a[k3] = akm*c3 - bkm*s3;
    b[k3] = akm*s3 + bkm*c3;
    kk = k3 + kspan;
    if( kk <= nt) goto L150;
L170:
    kk = kk - nt + jc;
    if( kk <= mm) goto L130;
    if( kk < kspan) goto L200;
    kk = kk - kspan + inc;
    if(kk <= jc) goto L120;
    if(kspan == jc) goto L_fin;
    goto L_start;
L180:
    akp = akm + bjm;
    akm = akm - bjm;
    bkp = bkm - ajm;
    bkm = bkm + ajm;
    if( s1 != 0.0) goto L160;
L190:
    a[k1] = akp;
    b[k1] = bkp;
    a[k2] = ajp;
    b[k2] = bjp;
    a[k3] = akm;
    b[k3] = bkm;
    kk = k3 + kspan;
    if( kk <= nt) goto L150;
    goto L170;
L200:
    s1 = ((kk-1)/jc)*dr*rad;
    c1 = cos(s1);
    s1 = sin(s1);
    mm = imin2(kspan,mm+klim);
    goto L140;

/* transform for factor of 5 (optional code) */

L_f5:
    c2 = c72*c72 - s72*s72;
    s2 = 2.0*c72*s72;
L220:
    k1 = kk + kspan;
    k2 = k1 + kspan;
    k3 = k2 + kspan;
    k4 = k3 + kspan;
    akp = a[k1] + a[k4];
    akm = a[k1] - a[k4];
    bkp = b[k1] + b[k4];
    bkm = b[k1] - b[k4];
    ajp = a[k2] + a[k3];
    ajm = a[k2] - a[k3];
    bjp = b[k2] + b[k3];
    bjm = b[k2] - b[k3];
    aa = a[kk];
    bb = b[kk];
    a[kk] = aa + akp + ajp;
    b[kk] = bb + bkp + bjp;
    ak = akp*c72 + ajp*c2 + aa;
    bk = bkp*c72 + bjp*c2 + bb;
    aj = akm*s72 + ajm*s2;
    bj = bkm*s72 + bjm*s2;
    a[k1] = ak - bj;
    a[k4] = ak + bj;
    b[k1] = bk + aj;
    b[k4] = bk - aj;
    ak = akp*c2 + ajp*c72 + aa;
    bk = bkp*c2 + bjp*c72 + bb;
    aj = akm*s2 - ajm*s72;
    bj = bkm*s2 - bjm*s72;
    a[k2] = ak - bj;
    a[k3] = ak + bj;
    b[k2] = bk + aj;
    b[k3] = bk - aj;
    kk = k4 + kspan;
    if( kk < nn) goto L220;
    kk = kk - nn;
    if( kk <= kspan) goto L220;
    goto L290;

/* transform for odd factors */

L_f_odd:
    k = nfac[i-1];
    kspnn = kspan;
    kspan /= k;
    if(k == 3) goto L100;
    if(k == 5) goto L_f5;
    if(k == jf) goto L250;
    jf = k;
    s1 = rad/(k/8.0);
    c1 = cos(s1);
    s1 = sin(s1);
    ck[jf] = 1.0;
    sk[jf] = 0.0;

    for(j = 1; j < k; j++) { /* k is changing as well */
	ck[j] = ck[k]*c1 + sk[k]*s1;
	sk[j] = ck[k]*s1 - sk[k]*c1;
	k--;
	ck[k] = ck[j];
	sk[k] = -sk[j];
    }

L250:
    k1 = kk;
    k2 = kk + kspnn;
    aa = a[kk];
    bb = b[kk];
    ak = aa;
    bk = bb;
    j = 1;
    k1 = k1 + kspan;
L260:
    k2 = k2 - kspan;
    j++;
    at[j] = a[k1] + a[k2];
    ak = at[j] + ak;
    bt[j] = b[k1] + b[k2];
    bk = bt[j] + bk;
    j++;
    at[j] = a[k1] - a[k2];
    bt[j] = b[k1] - b[k2];
    k1 = k1 + kspan;
    if( k1 < k2) goto L260;
    a[kk] = ak;
    b[kk] = bk;
    k1 = kk;
    k2 = kk + kspnn;
    j = 1;
L270:
    k1 += kspan;
    k2 -= kspan;
    jj = j;
    ak = aa;
    bk = bb;
    aj = 0.0;
    bj = 0.0;
    k = 1;
    for(k = 2; k < jf; k++) {
	ak += at[k]*ck[jj];
	bk += bt[k]*ck[jj];
	k++;
	aj += at[k]*sk[jj];
	bj += bt[k]*sk[jj];
	jj += j;
	if(jj > jf) jj -= jf;
    }
    k = jf - j;
    a[k1] = ak - bj;
    b[k1] = bk + aj;
    a[k2] = ak + bj;
    b[k2] = bk - aj;
    j++;
    if( j < k) goto L270;
    kk = kk + kspnn;
    if( kk <= nn) goto L250;
    kk = kk - nn;
    if( kk <= kspan) goto L250;

/* multiply by rotation factor (except for factors of 2 and 4) */

L290:
    if(i == m) goto L_fin;
    kk = jc + 1;
L300:
    c2 = 1.0 - cd;
    s1 = sd;
    mm = imin2(kspan,klim);

    do { /* L320: */
	c1 = c2;
	s2 = s1;
	kk += kspan;
	do { /* L330: */
	    do {
		ak = a[kk];
		a[kk] = c2*ak - s2*b[kk];
		b[kk] = s2*ak + c2*b[kk];
		kk += kspnn;
	    } while(kk <= nt);
	    ak = s1*s2;
	    s2 = s1*c2 + c1*s2;
	    c2 = c1*c2 - ak;
	    kk += -nt + kspan;
	} while(kk <= kspnn);
	kk += -kspnn + jc;
	if(kk <= mm) { /* L310: */
	    c2 = c1 - (cd*c1+sd*s1);
	    s1 = s1 + (sd*c1-cd*s1);
/* the following three statements compensate for truncation error.*/
/* if rounded arithmetic is used (nowadays always ?!), they may be deleted. */
#ifdef TRUNCATED_ARITHMETIC
	    c1 = 0.5/(c2*c2+s1*s1) + 0.5;
	    s1 = c1*s1;
	    c2 = c1*c2;
#endif
	    continue/* goto L320*/;
	}
	if(kk >= kspan) {
	    kk = kk - kspan + jc + inc;
	    if( kk <= jc+jc) goto L300;
	    goto L_start;
	}
	s1 = ((kk-1)/jc)*dr*rad;
	c2 = cos(s1);
	s1 = sin(s1);
	mm = imin2(kspan,mm+klim);
    } while(1);

/*------------------------------------------------------------*/


/* permute the results to normal order---done in two stages */
/* permutation for square factors of n */

L_fin:
    np[1] = ks;
    if( kt == 0) goto L440;
    k = kt + kt + 1;
    if( m < k) k--;
    np[k+1] = jc;
    for(j = 1; j < k; j++, k--) {
	np[j+1] = np[j]/nfac[j-1];
	np[k] = np[k+1]*nfac[j-1];
    }
    k3 = np[k+1];
    kspan = np[2];
    kk = jc + 1;
    k2 = kspan + 1;
    j = 1;

    if(n == ntot) {

	/* permutation for single-variate transform (optional code) */

      L370:
	do {
	    ak = a[kk];	   a[kk] = a[k2];    a[k2] = ak;
	    bk = b[kk];	   b[kk] = b[k2];    b[k2] = bk;
	    kk += inc;
	    k2 += kspan;
	} while(k2 < ks);
      L380:
	do { k2 -= np[j]; j++; k2 += np[j+1]; } while(k2 > np[j]);
	j = 1;
	do {
	    if(kk < k2) goto L370;
	    kk += inc;
	    k2 += kspan;
	} while(k2 < ks);
	if( kk < ks) goto L380;
	jc = k3;

    } else {

	/* permutation for multivariate transform */

      L400:
	k = kk + jc;
	do {
	    ak = a[kk]; a[kk] = a[k2]; a[k2] = ak;
	    bk = b[kk]; b[kk] = b[k2]; b[k2] = bk;
	    kk += inc;
	    k2 += inc;
	} while( kk < k);
	kk += ks - jc;
	k2 += ks - jc;
	if(kk < nt) goto L400;
	k2 += - nt + kspan;
	kk += - nt + jc;
	if( k2 < ks) goto L400;

	do {
	    do { k2 -= np[j]; j++; k2 += np[j+1]; } while(k2 > np[j]);
	    j = 1;
	    do {
		if(kk < k2) goto L400;
		kk += jc;
		k2 += kspan;
	    } while(k2 < ks);
	} while(kk < ks);
	jc = k3;
    }

L440:
    if( 2*kt+1 >= m) return;
    kspnn = np[kt+1];

/* permutation for square-free factors of n */

    /* Here, nfac[] is overwritten... -- now CUMULATIVE ("cumprod") factors */
    nn = m - kt;
    nfac[nn] = 1;
    for(j = nn; j > kt; j--)
	nfac[j-1] *= nfac[j];
    kt++;
    nn = nfac[kt-1] - 1;
    jj = 0;
    j = 0;
    goto L480;
L460:
    jj -= k2;
    k2 = kk;
    k++;
    kk = nfac[k-1];
L470:
    jj += kk;
    if( jj >= k2) goto L460;
    np[j] = jj;
L480:
    k2 = nfac[kt-1];
    k = kt + 1;
    kk = nfac[k-1];
    j++;
    if( j <= nn) goto L470;

/* determine the permutation cycles of length greater than 1 */

    j = 0;
    goto L500;

    do {
	do { k = kk; kk = np[k]; np[k] = -kk; } while(kk != j);
	k3 = kk;
      L500:
	do { j++; kk = np[j]; } while(kk < 0);
    } while(kk != j);
    np[j] = -j;
    if( j != nn) goto L500;
    maxf *= inc;
    goto L570;

/* reorder a and b, following the permutation cycles */

L_ord:
    do j--; while(np[j] < 0);
    jj = jc;

L520:
    kspan = imin2(jj,maxf);
    jj -= kspan;
    k = np[j];
    kk = jc*k + i + jj;

    for(k1 = kk + kspan, k2 = 1; k1 != kk;
	k1 -= inc, k2++) {
	at[k2] = a[k1];
	bt[k2] = b[k1];
    }

    do {
	k1 = kk + kspan;
	k2 = k1 - jc*(k+np[k]);
	k = -np[k];
	do {
	    a[k1] = a[k2];
	    b[k1] = b[k2];
	    k1 -= inc;
	    k2 -= inc;
	} while( k1 != kk);
	kk = k2;
    } while(k != j);

    for(k1 = kk + kspan, k2 = 1; k1 > kk;
	k1 -= inc, k2++) {
	a[k1] = at[k2];
	b[k1] = bt[k2];
    }

    if(jj != 0) goto L520;
    if( j != 1) goto L_ord;

L570:
    j = k3 + 1;
    nt = nt - kspnn;
    i = nt - inc + 1;
    if( nt >= 0) goto L_ord;
} /* fftmx */

static int old_n = 0;

static int nfac[20];
static int m_fac;
static int kt;
static int maxf;
static int maxp;

/* At the end of factorization,
 *	nfac[]	contains the factors,
 *	m_fac	contains the number of factors and
 *	kt	contains the number of square factors  */

/* non-API, but used by package RandomFields */
void fft_factor(int n, int *pmaxf, int *pmaxp)
{
/* fft_factor - factorization check and determination of memory
 *		requirements for the fft.
 *
 * On return,	*pmaxf will give the maximum factor size
 * and		*pmaxp will give the amount of integer scratch storage required.
 *
 * If *pmaxf == 0, there was an error, the error type is indicated by *pmaxp:
 *
 *  If *pmaxp == 0  There was an illegal zero parameter among nseg, n, and nspn.
 *  If *pmaxp == 1  There were more than 20 factors to ntot.  */

    int j, jj, k, sqrtk, kchanged;

	/* check series length */

    if (n <= 0) {
	old_n = 0; *pmaxf = 0; *pmaxp = 0;
	return;
    }
    else old_n = n;

	/* determine the factors of n */

    m_fac = 0;
    k = n;/* k := remaining unfactored factor of n */
    if (k == 1)
	return;

	/* extract square factors first ------------------ */

    /* extract 4^2 = 16 separately
     * ==> at most one remaining factor 2^2 = 4, done below */
    while(k % 16 == 0) {
	nfac[m_fac++] = 4;
	k /= 16;
    }

    /* extract 3^2, 5^2, ... */
    kchanged = 0;
    sqrtk = (int)sqrt(k);
    for(j = 3; j <= sqrtk; j += 2) {
	jj = j * j;
	while(k % jj == 0) {
	    nfac[m_fac++] = j;
	    k /= jj;
	    kchanged = 1;
	}
	if (kchanged) {
	    kchanged = 0;
	    sqrtk = (int)sqrt(k);
	}
    }

    if(k <= 4) {
	kt = m_fac;
	nfac[m_fac] = k;
	if(k != 1) m_fac++;
    }
    else {
	if(k % 4 == 0) {
	    nfac[m_fac++] = 2;
	    k /= 4;
	}

	/* all square factors out now, but k >= 5 still */

	kt = m_fac;
	maxp = imax2(kt+kt+2, k-1);
	j = 2;
	do {
	    if (k % j == 0) {
		nfac[m_fac++] = j;
		k /= j;
	    }
	    if (j > INT_MAX - 2)
		break;
	    j = ((j+1)/2)*2 + 1;
	}
	while(j <= k);
    }

    if (m_fac <= kt+1)
	maxp = m_fac+kt+1;
    if (m_fac+kt > 20) {		/* error - too many factors */
	old_n = 0; *pmaxf = 0; *pmaxp = 0;
	return;
    }
    else {
	if (kt != 0) {
	    j = kt;
	    while(j != 0)
		nfac[m_fac++] = nfac[--j];
	}
	maxf = nfac[m_fac-kt-1];
/* The last squared factor is not necessarily the largest PR#1429 */
	if (kt > 0) maxf = imax2(nfac[kt-1], maxf);
	if (kt > 1) maxf = imax2(nfac[kt-2], maxf);
	if (kt > 2) maxf = imax2(nfac[kt-3], maxf);
    }
    *pmaxf = maxf;
    *pmaxp = maxp;
}


Rboolean fft_work(double *a, double *b, int nseg, int n, int nspn, int isn,
		  double *work, int *iwork)
{
	/* check that factorization was successful */

    if(old_n == 0) return FALSE;

	/* check that the parameters match those of the factorization call */

    if(n != old_n || nseg <= 0 || nspn <= 0 || isn == 0)
	return FALSE;

	/* perform the transform */

    size_t mf = maxf;
    int nspan = n * nspn, ntot = nspan * nseg;

    fftmx(a, b, ntot, n, nspan, isn, m_fac, kt,
	  work, work+mf, work+2*mf, work+3*mf,
	  iwork, nfac);

    return TRUE;
}