/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 2016-2023 The R Core Team * * Based on code donated from the data.table package * (C) 2006-2015 Matt Dowle and Arun Srinivasan. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #include #include /* It would be better to find a way to avoid abusing TRUELENGTH, but in the meantime replace TRUELENGTH/SET_TRUELENGTH with TRLEN/SET_TRLEN that cast to int to avoid warnings. */ #define TRLEN(x) ((int) TRUELENGTH(x)) #define SET_TRLEN(x, v) SET_TRUELENGTH(x, ((int) (v))) // gs = groupsizes e.g.23, 12, 87, 2, 1, 34,... static int *gs[2] = { NULL }; //two vectors flip flopped:flip and 1 - flip static int flip = 0; //allocated stack size static int gsalloc[2] = { 0 }; static int gsngrp[2] = { 0 }; //max grpn so far static int gsmax[2] = { 0 }; //max size of stack, set by do_radixsort to nrows static int gsmaxalloc = 0; //switched off for last arg unless retGrp==TRUE static Rboolean stackgrps = TRUE; // TRUE for setkey, FALSE for by= static Rboolean sortStr = TRUE; // used by do_radixsort and [i|d|c]sort to reorder order. // not needed if narg==1 static int *newo = NULL; // =1, 0, -1 for TRUE, NA, FALSE respectively. // Value rewritten inside do_radixsort(). static int nalast = -1; // =1, -1 for ascending and descending order respectively static int order = 1; //replaced n < 200 with n < N_SMALL.Easier to change later #define N_SMALL 200 // range limit for counting sort. Should be less than INT_MAX // (see setRange for details) #define N_RANGE 100000 static SEXP *saveds = NULL; static R_len_t *savedtl = NULL, nalloc = 0, nsaved = 0; static void savetl_init(void) { if (nsaved || nalloc || saveds || savedtl) error("Internal error: savetl_init checks failed (%d %d %p %p).", nsaved, nalloc, (void *)saveds, (void *)savedtl); nsaved = 0; nalloc = 100; saveds = (SEXP *) malloc(nalloc * sizeof(SEXP)); if (saveds == NULL) error("Could not allocate saveds in savetl_init"); savedtl = (R_len_t *) malloc(nalloc * sizeof(R_len_t)); if (savedtl == NULL) { free(saveds); error("Could not allocate saveds in savetl_init"); } } static void savetl_end(void) { // Can get called if nothing has been saved yet (nsaved == 0), or // even if _init() has not been called yet (pointers NULL). Such as // to clear up before error. Also, it might be that nothing needed // to be saved anyway. for (int i = 0; i < nsaved; i++) SET_TRLEN(saveds[i], savedtl[i]); free(saveds); // does nothing on NULL input free(savedtl); nsaved = nalloc = 0; saveds = NULL; savedtl = NULL; } static void savetl(SEXP s) { if (nsaved >= nalloc) { nalloc *= 2; char *tmp; tmp = (char *) realloc(saveds, nalloc * sizeof(SEXP)); if (tmp == NULL) { savetl_end(); error("Could not realloc saveds in savetl"); } saveds = (SEXP *) tmp; tmp = (char *) realloc(savedtl, nalloc * sizeof(R_len_t)); if (tmp == NULL) { savetl_end(); error("Could not realloc savedtl in savetl"); } savedtl = (R_len_t *) tmp; } saveds[nsaved] = s; savedtl[nsaved] = TRLEN(s); nsaved++; } // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon #define Error(...) do {savetl_end(); error(__VA_ARGS__);} while(0) #undef warning // since it can be turned to error via warn = 2 #define warning(...) Do not use warning in this file /* use malloc/realloc (not Calloc/Realloc) so we can trap errors and call savetl_end() before the error(). */ static void growstack(uint64_t newlen) { // no link to icount range restriction, // just 100,000 seems a good minimum at 0.4MB if (newlen == 0) newlen = 100000; if (newlen > gsmaxalloc) newlen = gsmaxalloc; gs[flip] = realloc(gs[flip], newlen * sizeof(int)); if (gs[flip] == NULL) Error("Failed to realloc working memory stack to %d*4bytes (flip=%d)", (int)newlen /* no bigger than gsmaxalloc */, flip); gsalloc[flip] = (int)newlen; } static void push(int x) { if (!stackgrps || x == 0) return; if (gsalloc[flip] == gsngrp[flip]) growstack((uint64_t)(gsngrp[flip]) * 2); gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void mpush(int x, int n) { if (!stackgrps || x == 0) return; if (gsalloc[flip] < gsngrp[flip] + n) growstack(((uint64_t)(gsngrp[flip]) + n) * 2); for (int i = 0; i < n; i++) gs[flip][gsngrp[flip]++] = x; if (x > gsmax[flip]) gsmax[flip] = x; } static void flipflop(void) { flip = 1 - flip; gsngrp[flip] = 0; gsmax[flip] = 0; if (gsalloc[flip] < gsalloc[1 - flip]) growstack((uint64_t)(gsalloc[1 - flip]) * 2); } static void gsfree(void) { free(gs[0]); free(gs[1]); gs[0] = NULL; gs[1] = NULL; flip = 0; gsalloc[0] = gsalloc[1] = 0; gsngrp[0] = gsngrp[1] = 0; gsmax[0] = gsmax[1] = 0; gsmaxalloc = 0; } #ifdef TIMING_ON // many calls to clock() can be expensive, // hence compiled out rather than switch(verbose) #include #define NBLOCK 20 static clock_t tblock[NBLOCK], tstart; static int nblock[NBLOCK]; #define TBEG() tstart = clock(); #define TEND(i) tblock[i] += clock()-tstart; nblock[i]++; tstart = clock(); #else #define TBEG() #define TEND(i) #endif static int range, xmin; // used by both icount and do_radixsort static void setRange(int *x, int n) { xmin = NA_INTEGER; int xmax = NA_INTEGER; double overflow; int i = 0; while(i < n && x[i] == NA_INTEGER) i++; if (i < n) xmax = xmin = x[i]; for (; i < n; i++) { int tmp = x[i]; if (tmp == NA_INTEGER) continue; if (tmp > xmax) xmax = tmp; else if (tmp < xmin) xmin = tmp; } // all NAs, nothing to do if (xmin == NA_INTEGER) { range = NA_INTEGER; return; } // ex: x=c(-2147483647L, NA_integer_, 1L) results in overflowing int range. overflow = (double) xmax - (double) xmin + 1; // detect and force iradix here, since icount is out of the picture if (overflow > INT_MAX) { range = INT_MAX; return; } range = xmax - xmin + 1; return; } // x*order results in integer overflow when -1*NA, // so careful to avoid that here : static inline int icheck(int x) { // if nalast == 1, NAs must go last. return ((nalast != 1) ? ((x != NA_INTEGER) ? x*order : x) : ((x != NA_INTEGER) ? (x*order) - 1 : INT_MAX)); } static void icount(int *x, int *o, int n) /* Counting sort: 1. Places the ordering into o directly, overwriting whatever was there 2. Doesn't change x 3. Pushes group sizes onto stack */ { int napos = range; // NA's always counted in last bin // static is IMPORTANT, counting sort is called repetitively. static unsigned int counts[N_RANGE + 1] = { 0 }; /* counts are set back to 0 at the end efficiently. 1e5 = 0.4MB i.e tiny. We'll only use the front part of it, as large as range. So it's just reserving space, not using it. Have defined N_RANGE to be 100000.*/ if (range > N_RANGE) Error("Internal error: range = %d; isorted cannot handle range > %d", range, N_RANGE); for (int i = 0; i < n; i++) { // For nalast=NA case, we won't remove/skip NAs, rather set 'o' indices // to 0. subset will skip them. We can't know how many NAs to skip // beforehand - i.e. while allocating "ans" vector if (x[i] == NA_INTEGER) counts[napos]++; else counts[x[i] - xmin]++; } int tmp = 0; if (nalast != 1 && counts[napos]) { push(counts[napos]); tmp += counts[napos]; } int w = (order==1) ? 0 : range-1; for (int i = 0; i < range; i++) /* no point in adding tmp < n && i <= range, since range includes max, need to go to max, unlike 256 loops elsewhere in radixsort.c */ { if (counts[w]) { // cumulate but not through 0's. // Helps resetting zeros when n < range, below. push(counts[w]); counts[w] = (tmp += counts[w]); } w += order; // order is +1 or -1 } if (nalast == 1 && counts[napos]) { push(counts[napos]); counts[napos] = (tmp += counts[napos]); } for (int i = n - 1; i >= 0; i--) { // This way na.last=TRUE/FALSE cases will have just a // single if-check overhead. o[--counts[(x[i] == NA_INTEGER) ? napos : x[i] - xmin]] = (int) (i + 1); } // nalast = 1, -1 are both taken care already. if (nalast == 0) // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i < n; i++) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not modifed here. /* counts were cumulated above so leaves non zero. Faster to clear up now ready for next time. */ if (n < range) { /* Many zeros in counts already. Loop through n instead, doesn't matter if we set to 0 several times on any repeats */ counts[napos] = 0; for (int i = 0; i < n; i++) { if (x[i] != NA_INTEGER) counts[x[i] - xmin] = 0; } } else memset(counts, 0, (range + 1) * sizeof(int)); return; } static void iinsert(int *x, int *o, int n) /* orders both x and o by reference in-place. Fast for small vectors, low overhead. don't be tempted to binsearch backwards here, have to shift anyway; many memmove would have overhead and do the same thing. */ /* when nalast == 0, iinsert will be called only from within iradix, where o[.] = 0 for x[.]=NA is already taken care of */ { for (int i = 1; i < n; i++) { int xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; int otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } int tt = 0; for (int i = 1; i < n; i++) if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } push(tt + 1); } /* iradix is a counting sort performed forwards from MSB to LSB, with some tricks and short circuits building on Terdiman and Herf. http://codercorner.com/RadixSortRevisited.htm http://stereopsis.com/radix.html ~ Note they are LSD, but we do MSD here which is more complicated, for efficiency. ~ NAs need no special treatment as NA is the most negative integer in R (checked in init.c once, for efficiency) so NA naturally sort to the front. ~ Using 4-pass 1-byte radix for the following reasons : * 11-bit (Herf) reduces to 3-passes (3*11=33) yes, and LSD need random access to o vector in each pass 1:n so reduction in passes is good, but Terdiman's idea to skip a radix if all values are equal occurs less the wider the radix. A narrower radix benefits more from that. * That's detected here using a single 'if', an improvement on Terdiman's exposition of a single loop to find if any count==n * The pass through counts bites when radix is wider, because we repetitively call this iradix from fastorder forwards. * Herf's parallel histogramming is neat. In 4-pass 1-byte it needs 4*256 storage, that's tiny, and can be static. 4*256 << 3*2048. 4-pass 1-byte is simpler and tighter code than 3-pass 11-bit, giving modern optimizers and modern CPUs a better chance. We may get lucky anyway, if one or two of the 4-passes are skipped. Recall: there are no comparisons at all in counting and radix, there is wide random access in each LSD radix pass, though. */ // 4 are used for iradix, 8 for dradix and i64radix static unsigned int radixcounts[8][257] = { {0} }; static int skip[8]; /* global because iradix and iradix_r interact and are called repetitively. counts are set back to 0 after each use, to benefit from skipped radix. */ static void *radix_xsub = NULL; static size_t radix_xsuballoc = 0; static int *otmp = NULL, otmp_alloc = 0; static void alloc_otmp(int n) { if (otmp_alloc >= n) return; otmp = (int *) realloc(otmp, n * sizeof(int)); if (otmp == NULL) Error("Failed to allocate working memory for otmp. Requested %d * %d bytes", n, (int)sizeof(int)); otmp_alloc = n; } // TO DO: save xtmp if possible, see allocs in do_radixsort static void *xtmp = NULL; static int xtmp_alloc = 0; // TO DO: currently always the largest type (double) but // could be int if that's all that's needed static void alloc_xtmp(int n) { if (xtmp_alloc >= n) return; xtmp = (double *) realloc(xtmp, n * sizeof(double)); if (xtmp == NULL) Error("Failed to allocate working memory for xtmp. Requested %d * %d bytes", n, (int)sizeof(double)); xtmp_alloc = n; } static void iradix_r(int *xsub, int *osub, int n, int radix); static void iradix(int *x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack */ { int nextradix, itmp, thisgrpn, maxgrpn; unsigned int thisx = 0, shift, *thiscounts; for (int i = 0; i < n;i++) { /* parallel histogramming pass; i.e. count occurrences of 0:255 in each byte. Sequential so almost negligible. */ // relies on overflow behaviour. And shouldn't -INT_MIN be up in iradix? thisx = (unsigned int) (icheck(x[i])) - INT_MIN; // unrolled since inside n-loop radixcounts[0][thisx & 0xFF]++; radixcounts[1][thisx >> 8 & 0xFF]++; radixcounts[2][thisx >> 16 & 0xFF]++; radixcounts[3][thisx >> 24 & 0xFF]++; } for (int radix = 0; radix < 4; radix++) { /* any(count == n) => all radix must have been that value => last x (still thisx) was that value */ int i = thisx >> (radix*8) & 0xFF; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } int radix = 3; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; one number repeated n times. if (nalast == 0 && x[0] == NA_INTEGER) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i < n; i++) o[i] = 0; else for (int i = 0; i < n; i++) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); /* clear the counts as we only needed the parallel pass for skip[] and we're going to use radixcounts again below. Can't use parallel lower counts in MSD radix, unlike LSD. */ } thiscounts = radixcounts[radix]; shift = radix * 8; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; i++) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below. if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) (icheck(x[i])) - INT_MIN) >> shift & 0xFF; o[--thiscounts[thisx]] = i + 1; } if (radix_xsuballoc < maxgrpn) { // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) // TO DO: could include extra bits to divide the first radix // up more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (int *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in iradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } // TO DO: can we leave this to do_radixsort and remove these calls?? alloc_otmp(maxgrpn); // TO DO: doesn't need to be sizeof(double) always, see inside alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Internal error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; i++) { if (thiscounts[i] == 0) continue; // undo cumulate; i.e. diff thisgrpn = thiscounts[i] - itmp; if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { for (int j = 0; j < thisgrpn; j++) // this is why this xsub here can't be the same memory as // xsub in do_radixsort. ((int *)radix_xsub)[j] = icheck(x[o[itmp+j]-1]); // changes xsub and o by reference recursively. iradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. // nalast = 0 is dealt with separately as it just sets o to 0 for (int i = 0; i < n; i++) o[i] = (x[o[i] - 1] == NA_INTEGER) ? 0 : o[i]; // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void iradix_r(int *xsub, int *osub, int n, int radix) // xsub is a recursive offset into xsub working memory above in // iradix, reordered by reference. osub is a an offset into the main // answer o, reordered by reference. radix iterates 3,2,1,0 { int j, itmp, thisx, thisgrpn, nextradix, shift; unsigned int *thiscounts; // N_SMALL=200 is guess based on limited testing. Needs // calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 // cummulate + 256 memset + allowance since reverse order is // unlikely. when nalast==0, iinsert will be called only from // within iradix. if (n < N_SMALL) { iinsert(xsub, osub, n); return; } shift = radix * 8; thiscounts = radixcounts[radix]; for (int i = 0; i < n; i++) { thisx = (unsigned int) xsub[i] - INT_MIN; // sequential in xsub thiscounts[thisx >> shift & 0xFF]++; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; i++) // don't cummulate through 0s, important below if (thiscounts[i]) thiscounts[i] = (itmp += thiscounts[i]); for (int i = n - 1; i >= 0; i--) { thisx = ((unsigned int) xsub[i] - INT_MIN) >> shift & 0xFF; j = --thiscounts[thisx]; otmp[j] = osub[i]; ((int *) xtmp)[j] = xsub[i]; } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * sizeof(int)); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; /* TO DO: If nextradix == -1 AND no further args from do_radixsort AND !retGrp, we're done. We have o. Remember to memset thiscounts before returning. */ if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; i++) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { iradix_r(xsub+itmp, osub+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } } // dradix from Arun's fastradixdouble.c // + changed to MSD and hooked into do_radixsort framework here. // + replaced tolerance with rounding s.f. static unsigned long long dmask1; static unsigned long long dmask2; static void setNumericRounding(int dround) { dmask1 = dround ? 1 << (8 * dround - 1) : 0; dmask2 = 0xffffffffffffffff << dround * 8; } static union { double d; unsigned long long ull; } u; static unsigned long long dtwiddle(void *p, int i, int order) { u.d = order * ((double *)p)[i]; // take care of 'order' at the beginning if (R_FINITE(u.d)) { u.ull = (u.d != 0.0) ? u.ull + ((u.ull & dmask1) << 1) : 0; } else if (ISNAN(u.d)) { u.ull = 0; return (nalast == 1 ? ~u.ull : u.ull); } unsigned long long mask = (u.ull & 0x8000000000000000) ? // always flip sign bit and if negative (sign bit was set) // flip other bits too 0xffffffffffffffff : 0x8000000000000000; return ((u.ull ^ mask) & dmask2); } static Rboolean dnan(void *p, int i) { u.d = ((double *) p)[i]; return (ISNAN(u.d)); } static unsigned long long (*twiddle) (void *, int, int); static Rboolean(*is_nan) (void *, int); // the size of the arg type (4 or 8). Just 8 currently until iradix is // merged in. static size_t colSize = 8; static void dradix_r(unsigned char *xsub, int *osub, int n, int radix); #ifdef WORDS_BIGENDIAN #define RADIX_BYTE colSize - radix - 1 #else #define RADIX_BYTE radix #endif static void dradix(unsigned char *x, int *o, int n) { int radix, nextradix, itmp, thisgrpn, maxgrpn; unsigned int *thiscounts; unsigned long long thisx = 0; // see comments in iradix for structure. This follows the same. // TO DO: merge iradix in here (almost ready) for (int i = 0; i < n; i++) { thisx = twiddle(x, i, order); for (radix = 0; radix < colSize; radix++) // if dround == 2 then radix 0 and 1 will be all 0 here and skipped. /* on little endian, 0 is the least significant bits (the right) and 7 is the most including sign (the left); i.e. reversed. */ radixcounts[radix][((unsigned char *)&thisx)[RADIX_BYTE]]++; } for (radix = 0; radix < colSize; radix++) { // thisx is the last x after loop above int i = ((unsigned char *) &thisx)[RADIX_BYTE]; skip[radix] = radixcounts[radix][i] == n; // clear it now, the other counts must be 0 already if (skip[radix]) radixcounts[radix][i] = 0; } radix = (int) colSize - 1; // MSD while (radix >= 0 && skip[radix]) radix--; if (radix == -1) { // All radix are skipped; i.e. one number repeated n times. if (nalast == 0 && is_nan(x, 0)) // all values are identical. return 0 if nalast=0 & all NA // because of 'return', have to take care of it here. for (int i = 0; i < n; i++) o[i] = 0; else for (int i = 0; i < n; i++) o[i] = (i + 1); push(n); return; } for (int i = radix - 1; i >= 0; i--) { // clear the lower radix counts, we only did them to know // skip. will be reused within each group if (!skip[i]) memset(radixcounts[i], 0, 257 * sizeof(unsigned int)); } thiscounts = radixcounts[radix]; itmp = thiscounts[0]; maxgrpn = itmp; for (int i = 1; itmp < n && i < 256; i++) { thisgrpn = thiscounts[i]; if (thisgrpn) { // don't cummulate through 0s, important below if (thisgrpn > maxgrpn) maxgrpn = thisgrpn; thiscounts[i] = (itmp += thisgrpn); } } for (int i = n - 1; i >= 0; i--) { thisx = twiddle(x, i, order); o[ --thiscounts[((unsigned char *)&thisx)[RADIX_BYTE]] ] = i + 1; } if (radix_xsuballoc < maxgrpn) { // TO DO: centralize this alloc // The largest group according to the first non-skipped radix, // so could be big (if radix is needed on first arg) TO DO: // could include extra bits to divide the first radix up // more. Often the MSD has groups in just 0-4 out of 256. // free'd at the end of do_radixsort once we're done calling iradix // repetitively radix_xsub = (double *) realloc(radix_xsub, maxgrpn * sizeof(double)); if (!radix_xsub) Error("Failed to realloc working memory %d*8bytes (xsub in dradix), radix=%d", maxgrpn, radix); radix_xsuballoc = maxgrpn; } alloc_otmp(maxgrpn); // TO DO: leave to do_radixsort and remove these? alloc_xtmp(maxgrpn); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. dradix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; i++) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) { push(thisgrpn); } else { if (colSize == 4) { // ready for merging in iradix ... error("Not yet used, still using iradix instead"); for (int j = 0; j < thisgrpn; j++) ((int *)radix_xsub)[j] = (int)twiddle(x, o[itmp+j]-1, order); // this is why this xsub here can't be the same memory // as xsub in do_radixsort } else for (int j = 0; j < thisgrpn; j++) ((unsigned long long *)radix_xsub)[j] = twiddle(x, o[itmp+j]-1, order); // changes xsub and o by reference recursively. dradix_r(radix_xsub, o+itmp, thisgrpn, nextradix); } itmp = thiscounts[i]; thiscounts[i] = 0; } if (nalast == 0) // nalast = 1, -1 are both taken care already. for (int i = 0; i < n; i++) o[i] = is_nan(x, o[i] - 1) ? 0 : o[i]; // nalast = 0 is dealt with separately as it just sets o to 0 // at those indices where x is NA. x[o[i]-1] because x is not // modified by reference unlike iinsert or iradix_r } static void dinsert(unsigned long long *x, int *o, int n) // orders both x and o by reference in-place. Fast for small vectors, // low overhead. don't be tempted to binsearch backwards here, have // to shift anyway; many memmove would have overhead and do the same // thing 'dinsert' will not be called when nalast = 0 and o[0] = -1. { int otmp, tt; unsigned long long xtmp; for (int i = 1; i < n; i++) { xtmp = x[i]; if (xtmp < x[i - 1]) { int j = i - 1; otmp = o[i]; while (j >= 0 && xtmp < x[j]) { x[j + 1] = x[j]; o[j + 1] = o[j]; j--; } x[j + 1] = xtmp; o[j + 1] = otmp; } } tt = 0; for (int i = 1; i < n; i++) if (x[i] == x[i - 1]) tt++; else { push(tt + 1); tt = 0; } push(tt + 1); } static void dradix_r(unsigned char *xsub, int *osub, int n, int radix) /* xsub is a recursive offset into xsub working memory above in dradix, reordered by reference. osub is a an offset into the main answer o, reordered by reference. dradix iterates 7,6,5,4,3,2,1,0 */ { int itmp, thisgrpn, nextradix; unsigned int *thiscounts; unsigned char *p; if (n < 200) { /* 200 is guess based on limited testing. Needs calibrate(). Was 50 based on sum(1:50)=1275 worst -vs- 256 cummulate + 256 memset + allowance since reverse order is unlikely */ // order=1 here because it's already taken care of in iradix dinsert((void *)xsub, osub, n); return; } thiscounts = radixcounts[radix]; p = xsub + RADIX_BYTE; for (int i = 0; i < n; i++) { thiscounts[*p]++; p += colSize; } itmp = thiscounts[0]; for (int i = 1; itmp < n && i < 256; i++) // don't cummulate through 0s, important below if (thiscounts[i]) thiscounts[i] = (itmp += thiscounts[i]); p = xsub + (n - 1) * colSize; if (colSize == 4) { error("Not yet used, still using iradix instead"); for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((int *) xtmp)[j] = *(int *) p; p -= colSize; } } else { for (int i = n - 1; i >= 0; i--) { int j = --thiscounts[*(p + RADIX_BYTE)]; otmp[j] = osub[i]; ((unsigned long long *) xtmp)[j] = *(unsigned long long *) p; p -= colSize; } } memcpy(osub, otmp, n * sizeof(int)); memcpy(xsub, xtmp, n * colSize); nextradix = radix - 1; while (nextradix >= 0 && skip[nextradix]) nextradix--; // TO DO: If nextradix==-1 and no further args from do_radixsort, // we're done. We have o. Remember to memset thiscounts before // returning. if (thiscounts[0] != 0) Error("Logical error. thiscounts[0]=%d but should have been decremented to 0. radix=%d", thiscounts[0], radix); thiscounts[256] = n; itmp = 0; for (int i = 1; itmp < n && i <= 256; i++) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff if (thisgrpn == 1 || nextradix == -1) push(thisgrpn); else dradix_r(xsub + itmp * colSize, osub + itmp, thisgrpn, nextradix); itmp = thiscounts[i]; thiscounts[i] = 0; } } // TO DO?: dcount. Find step size, then range = (max-min)/step and // proceed as icount. Many fixed precision floats (such as prices) may // be suitable. Fixed precision such as 1.10, 1.15, 1.20, 1.25, 1.30 // ... do use all bits so dradix skipping may not help. static int *cradix_counts = NULL; static int cradix_counts_alloc = 0; static int maxlen = 1; static SEXP *cradix_xtmp = NULL; static int cradix_xtmp_alloc = 0; // same as StrCmp but also takes into account 'decreasing' and 'na.last' args. static int StrCmp2(SEXP x, SEXP y) { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; // if x=NA, nalast=1 ? then x > y else x < y (Note: nalast == 0 is // already taken care of in 'csorted', won't be 0 here) if (x == NA_STRING) return nalast; if (y == NA_STRING) return -nalast; // if y=NA, nalast=1 ? then y > x return order*strcmp(CHAR(x), CHAR(y)); // same as explanation in StrCmp } static int StrCmp(SEXP x, SEXP y) // also used by bmerge and chmatch { // same cached pointer (including NA_STRING == NA_STRING) if (x == y) return 0; if (x == NA_STRING) return -1; // x < y if (y == NA_STRING) return 1; // x > y // assumes strings are in same encoding return strcmp(CHAR(x), CHAR(y)); } #define CHAR_ENCODING(x) (IS_ASCII(x) ? CE_UTF8 : getCharCE(x)) static void checkEncodings(SEXP x) { cetype_t ce; int i; for (i = 0; i < length(x) && STRING_ELT(x, i) == NA_STRING; i++); if (i < length(x)) { ce = CHAR_ENCODING(STRING_ELT(x, i)); if (ce == CE_NATIVE) { error(_("Character encoding must be UTF-8, Latin-1 or bytes")); } } /* Disabled for now -- doubles the time (for already sorted vectors): why? for (int i = 1; i < length(x); i++) { if (ce != CHAR_ENCODING(STRING_ELT(x, i))) { error(_("Mixed character encodings are not supported")); } } */ } static void cradix_r(SEXP * xsub, int n, int radix) // xsub is a unique set of CHARSXP, to be ordered by reference // First time, radix == 0, and xsub == x. Then recursively moves SEXP together // for L1 cache efficiency. // Quite different to iradix because // 1) x is known to be unique so fits in cache // (wide random access not an issue) // 2) they're variable length character strings // 3) no need to maintain o. Just simply reorder x. No grps or push. // Fortunately, UTF sorts in the same order if treated as ASCII, so we // can simplify by doing it by bytes. // TO DO: confirm a forwards (MSD) radix for efficiency, although more // complicated. // This part has nothing to do with truelength. The // truelength stuff is to do with finding the unique strings. We may // be able to improve CHARSXP derefencing by submitting patch to R to // make R's string cache contiguous but would likely be difficult. If // we strxfrm, then it'll then be contiguous and compact then anyway. { int itmp, *thiscounts, thisgrpn=0, thisx=0; SEXP stmp; // TO DO?: chmatch to existing sorted vector, then grow it. // TO DO?: if (n= 0; i--) { thisx = xsub[i] == NA_STRING ? 0 : (radix < LENGTH(xsub[i]) ? (unsigned char) (CHAR(xsub[i])[radix]) : 1); int j = --thiscounts[thisx]; cradix_xtmp[j] = xsub[i]; } memcpy(xsub, cradix_xtmp, n * sizeof(SEXP)); if (radix == maxlen - 1) { memset(thiscounts, 0, 256 * sizeof(int)); return; } if (thiscounts[0] != 0) Error("Logical error. counts[0]=%d in cradix but should have been decremented to 0. radix=%d", thiscounts[0], radix); itmp = 0; for (int i = 1; i < 256; i++) { if (thiscounts[i] == 0) continue; thisgrpn = thiscounts[i] - itmp; // undo cummulate; i.e. diff cradix_r(xsub + itmp, thisgrpn, radix + 1); itmp = thiscounts[i]; // set to 0 now since we're here, saves memset // afterwards. Important to clear! Also more portable for // machines where 0 isn't all bits 0 (?!) thiscounts[i] = 0; } if (itmp < n - 1) cradix_r(xsub + itmp, n - itmp, radix + 1); // final group } static SEXP *ustr = NULL; static int ustr_alloc = 0, ustr_n = 0; static void cgroup(SEXP * x, int *o, int n) // As icount : // Places the ordering into o directly, overwriting whatever was there // Doesn't change x // Pushes group sizes onto stack // Only run when sortStr == FALSE. Basically a counting sort, in first // appearance order, directly. Since it doesn't sort the strings, the // name is cgroup. there is no _pre for this. ustr created and // cleared each time. { // savetl_init() is called once at the start of do_radixsort if (ustr_n != 0) Error ("Internal error. ustr isn't empty when starting cgroup: ustr_n=%d, ustr_alloc=%d", ustr_n, ustr_alloc); for (int i = 0; i < n; i++) { SEXP s = x[i]; if (TRLEN(s) < 0) { // this case first as it's the most frequent SET_TRLEN(s, TRLEN(s) - 1); // use negative counts so as to detect R's own (positive) // usage of tl on CHARSXP continue; } if (TRLEN(s) > 0) { // Save any of R's own usage of tl (assumed positive, so // we can both count and save in one scan), to restore // afterwards. From R 2.14.0, tl is initialized to 0, // prior to that it was random so this step saved too much. savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > n) ustr_alloc = n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Unable to realloc %d * %d bytes in cgroup", ustr_alloc, (int)sizeof(SEXP)); } SET_TRLEN(s, -1); ustr[ustr_n++] = s; } // TO DO: the same string in different encodings will be // considered different here. Sweep through ustr and merge counts // where equal (sort needed therefore, unfortunately?, only if // there are any marked encodings present) int cumsum = 0; for (int i = 0; i < ustr_n; i++) { // 0.000 push(-TRLEN(ustr[i])); SET_TRLEN(ustr[i], cumsum += -TRLEN(ustr[i])); } int *target = (o[0] != -1) ? newo : o; for (int i = n - 1; i >= 0; i--) { SEXP s = x[i]; // 0.400 (page fetches on string cache) int k = TRLEN(s) - 1; SET_TRLEN(s, k); target[k] = i + 1; // 0.800 (random access to o) } // The cummulate meant counts are left non zero, so reset for next // time (0.00s). for (int i = 0; i < ustr_n; i++) SET_TRLEN(ustr[i], 0); ustr_n = 0; } static int *csort_otmp = NULL, csort_otmp_alloc = 0; static void alloc_csort_otmp(int n) { if (csort_otmp_alloc >= n) return; csort_otmp = (int *) realloc(csort_otmp, n * sizeof(int)); if (csort_otmp == NULL) Error ("Failed to allocate working memory for csort_otmp. Requested %d * %d bytes", n, (int)sizeof(int)); csort_otmp_alloc = n; } static void csort(SEXP * x, int *o, int n) /* As icount : Places the ordering into o directly, overwriting whatever was there Doesn't change x Pushes group sizes onto stack Requires csort_pre() to have created and sorted ustr already */ { /* can't use otmp, since iradix might be called here and that uses otmp (and xtmp). alloc_csort_otmp(n) is called from do_radixsort for either n=nrow if 1st arg, or n=maxgrpn if onwards args */ for (int i = 0; i < n; i++) csort_otmp[i] = (x[i] == NA_STRING) ? NA_INTEGER : -TRLEN(x[i]); if (nalast == 0 && n == 2) { // special case for nalast == 0. n == 1 is handled inside // do_radixsort. at least 1 will be NA here else use o from caller // directly (not 1st arg) if (o[0] == -1) for (int i = 0; i < n; i++) o[i] = i + 1; for (int i = 0; i < n; i++) if (csort_otmp[i] == NA_INTEGER) o[i] = 0; push(1); push(1); return; } if (n < N_SMALL && nalast != 0) { // TO DO: calibrate() N_SMALL=200 if (o[0] == -1) for (int i = 0; i < n; i++) o[i] = i + 1; // else use o from caller directly (not 1st arg) for (int i = 0; i < n; i++) csort_otmp[i] = icheck(csort_otmp[i]); iinsert(csort_otmp, o, n); } else { setRange(csort_otmp, n); if (range == NA_INTEGER) Error("Internal error. csort's otmp contains all-NA"); int *target = (o[0] != -1) ? newo : o; if (range <= N_RANGE) // TO DO: calibrate(). radix was faster (9.2s // "range<=10000" instead of 11.6s "range<=N_RANGE && // range 0) { savetl(s); SET_TRLEN(s, 0); } if (ustr_alloc <= ustr_n) { // 10000 = 78k of 8byte pointers. Small initial guess, // negligible time to alloc. ustr_alloc = (ustr_alloc == 0) ? 10000 : ustr_alloc*2; if (ustr_alloc > old_un+n) ustr_alloc = old_un + n; ustr = realloc(ustr, ustr_alloc * sizeof(SEXP)); if (ustr == NULL) Error("Failed to realloc ustr. Requested %d * %d bytes", ustr_alloc, (int)sizeof(SEXP)); } SET_TRLEN(s, -1); // this -1 will become its ordering later below ustr[ustr_n++] = s; // length on CHARSXP is the nchar of char * (excluding \0), // and treats marked encodings as if ascii. if (s != NA_STRING && LENGTH(s) > maxlen) maxlen = LENGTH(s); } new_un = ustr_n; if (new_un == old_un) return; // No new strings observed, seen them all before in previous // arg. ustr already sufficient. If we ever make ustr // permanently held by data.table, we'll just need to make the // final loop to set -i-1 before returning here. sort ustr. // TODO: just sort new ones and merge them in. These allocs are // here, to save them being in the recursive cradix_r() if (cradix_counts_alloc < maxlen) { cradix_counts_alloc = maxlen + 10; // +10 to save too many reallocs cradix_counts = (int *)realloc(cradix_counts, cradix_counts_alloc * 256 * sizeof(int)); if (!cradix_counts) Error("Failed to alloc cradix_counts"); memset(cradix_counts, 0, cradix_counts_alloc * 256 * sizeof(int)); } if (cradix_xtmp_alloc < ustr_n) { cradix_xtmp = (SEXP *) realloc(cradix_xtmp, ustr_n * sizeof(SEXP)); // TO DO: Reuse the one we have in do_radixsort. // Does it need to be n length? if (!cradix_xtmp) Error("Failed to alloc cradix_tmp"); cradix_xtmp_alloc = ustr_n; } // sorts ustr in-place by reference save ordering in the // CHARSXP. negative so as to distinguish with R's own usage. cradix_r(ustr, ustr_n, 0); for (int i = 0; i < ustr_n; i++) SET_TRLEN(ustr[i], -i - 1); } // functions to test vectors for sortedness: isorted, dsorted and csorted // base:is.unsorted returns NA in the presence of any NA, but we need // to consider na.last, and we also return -1 if x is sorted in // _strictly_ reverse order; a common case we optimize. If a vector // is in decreasing order *with ties*, then an in-place reverse (no // sort) would result in instability of ties, so we are strict. We // also save grouping information during the check; that information // is required when sorting by multiple arguments. // TO DO: test in big steps first to return faster if unsortedness is // at the end (a common case of rbind'ing data to end) These are all // sequential access to x, so very quick and cache efficient. // order = 1 is ascending and order=-1 is descending; also takes care // of na.last argument with check through 'icheck' Relies on // NA_INTEGER == INT_MIN, checked in init.c static int isorted(int *x, int n) { int i = 1, j = 0; // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it // to sort routines to replace o's with 0's // no NAs ? continue to check rest of isorted - the same routine as usual if (nalast == 0) { for (int k = 0; k < n; k++) if (x[k] != NA_INTEGER) j++; if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (icheck(x[1]) < icheck(x[0])) { i = 2; while (i < n && icheck(x[i]) < icheck(x[i - 1])) i++; // strictly opposite to expected 'order', no ties; if (i == n) { mpush(1, n); return (-1); } // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i < n; i++) { if (icheck(x[i]) < icheck(x[i - 1])) { gsngrp[flip] = old; return (0); } if (x[i] == x[i - 1]) tt++; else { push(tt); tt = 1; } } push(tt); // same as 'order', NAs at the beginning for order=1, at end for // order=-1, possibly with ties return(1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) (in twiddle) static int dsorted(double *x, int n) { int i = 1, j = 0; unsigned long long prev, this; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines to // replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k < n; k++) if (!is_nan(x, k)) j++; if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } prev = twiddle(x, 0, order); this = twiddle(x, 1, order); if (this < prev) { i = 2; prev = this; while (i < n && (this = twiddle(x, i, order)) < prev) { i++; prev = this; } if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; e.g. no // more than one NA at the beginning/end (for order=-1/1) // TO DO: improve to be stable for ties in reverse else return(0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i < n; i++) { // TO DO: once we get past -Inf, NA and NaN at the bottom, and // +Inf at the top, the middle only need be twiddled // for tolerance (worth it?) this = twiddle(x, i, order); if (this < prev) { gsngrp[flip] = old; return (0); } if (this == prev) tt++; else { push(tt); tt = 1; } prev = this; } push(tt); // exactly as expected in 'order' (1=increasing, -1=decreasing), // possibly with ties return (1); } // order=1 is ascending and -1 is descending // also accounts for nalast=0 (=NA), =1 (TRUE), -1 (FALSE) static int csorted(SEXP *x, int n) { int i = 1, j = 0, tmp; if (nalast == 0) { // when nalast = NA, // all NAs ? return special value to replace all o's values with '0' // any NAs ? return 0 = unsorted and leave it to sort routines // to replace o's with 0's // no NAs ? continue to check the rest of isorted - // the same routine as usual for (int k = 0; k < n; k++) if (x[k] != NA_STRING) j++; if (j == 0) { push(n); return (-2); } if (j != n) return (0); } if (n <= 1) { push(n); return (1); } if (StrCmp2(x[1], x[0]) < 0) { i = 2; while (i < n && StrCmp2(x[i], x[i - 1]) < 0) i++; if (i == n) { mpush(1, n); return (-1); } // strictly opposite of expected 'order', no ties; // e.g. no more than one NA at the beginning/end (for order=-1/1) else return (0); } int old = gsngrp[flip]; int tt = 1; for (int i = 1; i < n; i++) { tmp = StrCmp2(x[i], x[i - 1]); if (tmp < 0) { gsngrp[flip] = old; return (0); } if (tmp == 0) tt++; else { push(tt); tt = 1; } } push(tt); // exactly as expected in 'order', possibly with ties return (1); } static void isort(int *x, int *o, int n) { if (n <= 2) { // nalast = 0 and n == 2 (check bottom of this file for explanation) if (nalast == 0 && n == 2) { if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i < n; i++) if (x[i] == NA_INTEGER) o[i] = 0; push(1); push(1); return; } else Error("Internal error: isort received n=%d. isorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r on N_SMALL=200. /* if not o[0] then can't just populate with 1:n here, since x is changed by ref too (so would need to be copied). */ /* pushes inside too. Changes x and o by reference, so not suitable in first arg when o hasn't been populated yet and x is an actual argument (hence check on o[0]). */ if (order != 1 || nalast != -1) // so that default case, i.e., order=1, nalast=FALSE will // not be affected (ex: `setkey`) for (int i = 0; i < n; i++) x[i] = icheck(x[i]); iinsert(x, o, n); } else { /* Tighter range (e.g. copes better with a few abnormally large values in some groups), but also, when setRange was once at arg level that caused an extra scan of (long) x first. 10,000 calls to setRange takes just 0.04s i.e. negligible. */ setRange(x, n); if (range == NA_INTEGER) Error("Internal error: isort passed all-NA. isorted should have caught this before this point"); int *target = (o[0] != -1) ? newo : o; // was range < 10000 for subgroups, but 1e5 for the first // arg, tried to generalise here. 1e4 rather than 1e5 here // because iterated was (thisgrpn < 200 || range > 20000) then // radix a short vector with large range can bite icount when // iterated (BLOCK 4 and 6) if (range <= N_RANGE && range <= n) { icount(x, target, n); } else { iradix(x, target, n); } } } static void dsort(double *x, int *o, int n) { if (n <= 2) { if (nalast == 0 && n == 2) { // don't have to twiddle here.. at least one will be NA // and 'n' WILL BE 2. if (o[0] == -1) { o[0] = 1; o[1] = 2; } for (int i = 0; i < n; i++) if (is_nan(x, i)) o[i] = 0; push(1); push(1); return; } Error("Internal error: dsort received n=%d. dsorted should have dealt with this (e.g. as a reverse sorted vector) already",n); } if (n < N_SMALL && o[0] != -1 && nalast != 0) { // see comment above in iradix_r re N_SMALL=200, and isort for o[0] for (int i = 0; i < n; i++) ((unsigned long long *)x)[i] = twiddle(x, i, order); // have to twiddle here anyways, can't speed up default case // like in isort dinsert((unsigned long long *)x, o, n); } else { dradix((unsigned char *) x, (o[0] != -1) ? newo : o, n); } } attribute_hidden SEXP do_radixsort(SEXP call, SEXP op, SEXP args, SEXP rho) { int n = -1, narg = 0, ngrp, tmp, *osub, thisgrpn; R_xlen_t nl = n; Rboolean isSorted = TRUE, retGrp; void *xd; int *o = NULL; /* ML: FIXME: Here are just two of the dangerous assumptions here */ if (sizeof(int) != 4) { error("radix sort assumes sizeof(int) == 4"); } if (sizeof(double) != 8) { error("radix sort assumes sizeof(double) == 8"); } nalast = (asLogical(CAR(args)) == NA_LOGICAL) ? 0 : (asLogical(CAR(args)) == TRUE) ? 1 : -1; // 1=TRUE, -1=FALSE, 0=NA args = CDR(args); SEXP decreasing = CAR(args); args = CDR(args); /* If TRUE, return starts of runs of identical values + max group size. */ retGrp = asLogical(CAR(args)); args = CDR(args); /* If FALSE, get order of strings in appearance order. Essentially abuses the CHARSXP table to group strings without hashing them. Only makes sense when retGrp=TRUE. */ sortStr = asLogical(CAR(args)); args = CDR(args); /* When grouping, we round off doubles to account for imprecision */ setNumericRounding(retGrp ? 2 : 0); if (args == R_NilValue) return R_NilValue; if (isVector(CAR(args))) nl = XLENGTH(CAR(args)); for (SEXP ap = args; ap != R_NilValue; ap = CDR(ap), narg++) { if (!isVector(CAR(ap))) error(_("argument %d is not a vector"), narg + 1); //Rprintf("%d, %d\n", XLENGTH(CAR(ap)), nl); if (XLENGTH(CAR(ap)) != nl) error(_("argument lengths differ")); } if (narg != length(decreasing)) error(_("length(decreasing) must match the number of order arguments")); for (int i = 0; i < narg; i++) { if (LOGICAL(decreasing)[i] == NA_LOGICAL) error(_("'decreasing' elements must be TRUE or FALSE")); } order = asLogical(decreasing) ? -1 : 1; SEXP x = CAR(args); args = CDR(args); // (ML) FIXME: need to support long vectors if (nl > INT_MAX) { error(_("long vectors not supported")); } n = (int) nl; // upper limit for stack size (all size 1 groups). We'll detect // and avoid that limit, but if just one non-1 group (say 2), that // can't be avoided. gsmaxalloc = n; // once for the result, needs to be length n. // TO DO: save allocation if NULL is returned (isSorted = =TRUE) so // [i|c|d]sort know they can populate o directly with no working // memory needed to reorder existing order had to repace this from // '0' to '-1' because 'nalast = 0' replace 'o[.]' with 0 values. SEXP ans = PROTECT(allocVector(INTSXP, n)); o = INTEGER(ans); if (n > 0) o[0] = -1; xd = DATAPTR(x); stackgrps = narg > 1 || retGrp; if (TYPEOF(x) == STRSXP) { checkEncodings(x); } savetl_init(); // from now on use Error not error. switch (TYPEOF(x)) { case INTSXP: case LGLSXP: tmp = isorted(xd, n); break; case REALSXP : twiddle = &dtwiddle; is_nan = &dnan; tmp = dsorted(xd, n); break; case STRSXP : tmp = csorted(xd, n); break; default : Error("First arg is type '%s', not yet supported", R_typeToChar(x)); } if (tmp) { // -1 or 1. NEW: or -2 in case of nalast == 0 and all NAs if (tmp == 1) { // same as expected in 'order' (1 = increasing, -1 = decreasing) isSorted = TRUE; for (int i = 0; i < n; i++) o[i] = i + 1; } else if (tmp == -1) { // -1 (or -n for result of strcmp), strictly opposite to // -expected 'order' isSorted = FALSE; for (int i = 0; i < n; i++) o[i] = n - i; } else if (nalast == 0 && tmp == -2) { // happens only when nalast=NA/0. Means all NAs, replace // with 0's therefore! isSorted = FALSE; for (int i = 0; i < n; i++) o[i] = 0; } } else { isSorted = FALSE; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: isort(xd, o, n); break; case REALSXP : dsort(xd, o, n); break; case STRSXP : if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(n); csort(xd, o, n); } else cgroup(xd, o, n); break; default: Error ("Internal error: previous default should have caught unsupported type"); } } int maxgrpn = gsmax[flip]; // biggest group in the first arg void *xsub = NULL; // local // This was not valid C23, and clang 15 warns it was not valid C99 either. // int (*f) (); // called with fn pointer, int // void (*g) (); // called with fn pointer, int *, int int fgtype; if (narg > 1 && gsngrp[flip] < n) { // double is the largest type, 8 xsub = (void *) malloc(maxgrpn * sizeof(double)); if (xsub == NULL) Error("Couldn't allocate xsub in do_radixsort, requested %d * %d bytes.", maxgrpn, (int)sizeof(double)); // global variable, used by isort, dsort, sort and cgroup newo = (int *) malloc(maxgrpn * sizeof(int)); if (newo == NULL) Error("Couldn't allocate newo in do_radixsort, requested %d * %d bytes.", maxgrpn, (int)sizeof(int)); } for (int col = 2; col <= narg; col++) { x = CAR(args); args = CDR(args); xd = DATAPTR(x); ngrp = gsngrp[flip]; if (ngrp == n && nalast != 0) break; flipflop(); stackgrps = col != narg || retGrp; order = LOGICAL(decreasing)[col - 1] ? -1 : 1; switch (TYPEOF(x)) { case INTSXP: case LGLSXP: // f = &isorted; // g = &isort; fgtype = 1; break; case REALSXP: twiddle = &dtwiddle; is_nan = &dnan; fgtype = 2; // f = &dsorted; // g = &dsort; break; case STRSXP: fgtype = 3; // f = &csorted; if (sortStr) { csort_pre(xd, n); alloc_csort_otmp(gsmax[1 - flip]); // g = &csort; } // no increasing/decreasing order required if sortStr = FALSE, // just a dummy argument else { fgtype = 4; // g = &cgroup; } break; default: Error("Arg %d is type '%s', not yet supported", col, R_typeToChar(x)); } int i = 0; for (int grp = 0; grp < ngrp; grp++) { thisgrpn = gs[1 - flip][grp]; if (thisgrpn == 1) { if (nalast == 0) { // this edge case had to be taken care of // here.. (see the bottom of this file for // more explanation) switch (TYPEOF(x)) { case INTSXP: if (INTEGER(x)[o[i] - 1] == NA_INTEGER) { isSorted = FALSE; o[i] = 0; } break; case LGLSXP: if (LOGICAL(x)[o[i] - 1] == NA_LOGICAL) { isSorted = FALSE; o[i] = 0; } break; case REALSXP: if (ISNAN(REAL(x)[o[i] - 1])) { isSorted = FALSE; o[i] = 0; } break; case STRSXP: if (STRING_ELT(x, o[i] - 1) == NA_STRING) { isSorted = FALSE; o[i] = 0; } break; default : Error("Internal error: previous default should have caught unsupported type"); } } i++; push(1); continue; } osub = o+i; // ** TO DO **: if isSorted, we can just point xsub // into x directly. If (*f)() returns 0, // though, will have to copy x at that point // When doing this, xsub could be allocated at // that point for the first time. if (TYPEOF(x) == STRSXP) for (int j = 0; j < thisgrpn; j++) ((SEXP *) xsub)[j] = ((SEXP *) xd)[o[i++] - 1]; else if (TYPEOF(x) == REALSXP) for (int j = 0; j < thisgrpn; j++) ((double *) xsub)[j] = ((double *) xd)[o[i++] - 1]; else for (int j = 0; j < thisgrpn; j++) ((int *) xsub)[j] = ((int *) xd)[o[i++] - 1]; // continue; // BASELINE short circuit timing // point. Up to here is the cost of creating xsub. // [i|d|c]sorted(); very low cost, sequential // tmp = (*f)(xsub, thisgrpn); switch(fgtype) { case 1: tmp = isorted(xsub, thisgrpn); break; case 2: tmp = dsorted(xsub, thisgrpn); break; case 3: case 4: tmp = csorted(xsub, thisgrpn); } if (tmp) { // *sorted will have already push()'d the groups if (tmp == -1) { isSorted = FALSE; for (int k = 0; k < thisgrpn / 2; k++) { // reverse the order in-place using no // function call or working memory // isorted only returns -1 for // _strictly_ decreasing order, // otherwise ties wouldn't be stable tmp = osub[k]; osub[k] = osub[thisgrpn - 1 - k]; osub[thisgrpn - 1 - k] = tmp; } } else if (nalast == 0 && tmp == -2) { // all NAs, replace osub[.] with 0s. isSorted = FALSE; for (int k = 0; k < thisgrpn; k++) osub[k] = 0; } continue; } isSorted = FALSE; // nalast=NA will result in newo[0] = 0. So had to change to -1. newo[0] = -1; // may update osub directly, or if not will put the // result in global newo // (*g)(xsub, osub, thisgrpn); switch(fgtype) { case 1: isort(xsub, osub, thisgrpn); break; case 2: dsort(xsub, osub, thisgrpn); break; case 3: csort(xsub, osub, thisgrpn); break; case 4: cgroup(xsub, osub, thisgrpn); break; } if (newo[0] != -1) { if (nalast != 0) for (int j = 0; j < thisgrpn; j++) // reuse xsub to reorder osub ((int *) xsub)[j] = osub[newo[j] - 1]; else for (int j = 0; j < thisgrpn; j++) // final nalast case to handle! ((int *) xsub)[j] = (newo[j] == 0) ? 0 : osub[newo[j] - 1]; memcpy(osub, xsub, thisgrpn * sizeof(int)); } } } if (!sortStr && ustr_n != 0) Error("Internal error: at the end of do_radixsort sortStr == FALSE but ustr_n !=0 [%d]", ustr_n); for(int i = 0; i < ustr_n; i++) SET_TRLEN(ustr[i], 0); maxlen = 1; // reset global. Minimum needed to count "" and NA ustr_n = 0; savetl_end(); free(ustr); ustr = NULL; ustr_alloc = 0; if (retGrp) { int maxgrpn = NA_INTEGER; ngrp = gsngrp[flip]; SEXP s_ends = install("ends"); setAttrib(ans, s_ends, x = allocVector(INTSXP, ngrp)); if (ngrp > 0) { INTEGER(x)[0] = gs[flip][0]; for (int i = 1; i < ngrp; i++) INTEGER(x)[i] = INTEGER(x)[i - 1] + gs[flip][i]; maxgrpn = gsmax[flip]; } SEXP s_maxgrpn = install("maxgrpn"); setAttrib(ans, s_maxgrpn, ScalarInteger(maxgrpn)); SEXP nms; PROTECT(nms = allocVector(STRSXP, 2)); SET_STRING_ELT(nms, 0, mkChar("grouping")); SET_STRING_ELT(nms, 1, mkChar("integer")); setAttrib(ans, R_ClassSymbol, nms); UNPROTECT(1); } Rboolean dropZeros = !retGrp && !isSorted && nalast == 0; if (dropZeros) { int zeros = 0; for (int i = 0; i < n; i++) { if (o[i] == 0) zeros++; } if (zeros > 0) { PROTECT(ans = allocVector(INTSXP, n - zeros)); int *o2 = INTEGER(ans); for (int i = 0, i2 = 0; i < n; i++) { if (o[i] > 0) o2[i2++] = o[i]; } UNPROTECT(1); } } gsfree(); free(radix_xsub); radix_xsub=NULL; radix_xsuballoc=0; free(xsub); free(newo); xsub=newo=NULL; free(xtmp); xtmp=NULL; xtmp_alloc=0; free(otmp); otmp=NULL; otmp_alloc=0; free(csort_otmp); csort_otmp=NULL; csort_otmp_alloc=0; free(cradix_counts); cradix_counts=NULL; cradix_counts_alloc=0; free(cradix_xtmp); cradix_xtmp=NULL; cradix_xtmp_alloc=0; // TO DO: use xtmp already got UNPROTECT(1); return ans; }