/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1997--2017 The R Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ #ifdef HAVE_CONFIG_H #include #endif #define R_USE_SIGNALS 1 #include #include #include #include /* for isspace */ #include /* for DBL_MAX */ #undef COMPILING_R #define R_imax2(x, y) ((x < y) ? y : x) #include #ifdef HAVE_UNISTD_H #include #endif #ifdef Win32 void R_UTF8fixslash(char *s); static void R_wfixslash(wchar_t *s); #endif #ifdef __cplusplus #include "Clinkage.h" extern "C" { #endif void F77_SYMBOL(rwarnc)(char *msg, int *nchar); void NORET F77_SYMBOL(rexitc)(char *msg, int *nchar); #ifdef __cplusplus } #endif #include /* Many small functions are included from ../include/Rinlinedfuns.h */ attribute_hidden Rboolean tsConform(SEXP x, SEXP y) { if ((x = getAttrib(x, R_TspSymbol)) != R_NilValue && (y = getAttrib(y, R_TspSymbol)) != R_NilValue) { /* tspgets should enforce this, but prior to 2.4.0 had INTEGER() here */ if(TYPEOF(x) == REALSXP && TYPEOF(y) == REALSXP) return REAL(x)[0] == REAL(x)[0] && REAL(x)[1] == REAL(x)[1] && REAL(x)[2] == REAL(x)[2]; /* else fall through */ } return FALSE; } int nrows(SEXP s) { SEXP t; if (isVector(s) || isList(s)) { t = getAttrib(s, R_DimSymbol); if (t == R_NilValue) return LENGTH(s); return INTEGER(t)[0]; } else if (isFrame(s)) { return nrows(CAR(s)); } else error(_("object is not a matrix")); return -1; } int ncols(SEXP s) { SEXP t; if (isVector(s) || isList(s)) { t = getAttrib(s, R_DimSymbol); if (t == R_NilValue) return 1; if (LENGTH(t) >= 2) return INTEGER(t)[1]; /* This is a 1D (or possibly 0D array) */ return 1; } else if (isFrame(s)) { return length(s); } else error(_("object is not a matrix")); return -1;/*NOTREACHED*/ } #ifdef UNUSED const static char type_msg[] = "invalid type passed to internal function\n"; void internalTypeCheck(SEXP call, SEXP s, SEXPTYPE type) { if (TYPEOF(s) != type) { if (call) errorcall(call, type_msg); else error(type_msg); } } #endif const static char * const truenames[] = { "T", "True", "TRUE", "true", (char *) NULL, }; const static char * const falsenames[] = { "F", "False", "FALSE", "false", (char *) NULL, }; SEXP asChar(SEXP x) { if (isVectorAtomic(x) && XLENGTH(x) >= 1) { int w, d, e, wi, di, ei; char buf[MAXELTSIZE]; /* Probably 100 would suffice */ switch (TYPEOF(x)) { case LGLSXP: if (LOGICAL(x)[0] == NA_LOGICAL) return NA_STRING; if (LOGICAL(x)[0]) sprintf(buf, "TRUE"); else sprintf(buf, "FALSE"); return mkChar(buf); case INTSXP: if (INTEGER(x)[0] == NA_INTEGER) return NA_STRING; snprintf(buf, MAXELTSIZE, "%d", INTEGER(x)[0]); return mkChar(buf); case REALSXP: PrintDefaults(); formatReal(REAL(x), 1, &w, &d, &e, 0); return mkChar(EncodeReal0(REAL(x)[0], w, d, e, OutDec)); case CPLXSXP: PrintDefaults(); formatComplex(COMPLEX(x), 1, &w, &d, &e, &wi, &di, &ei, 0); return mkChar(EncodeComplex(COMPLEX(x)[0], w, d, e, wi, di, ei, OutDec)); case STRSXP: return STRING_ELT(x, 0); default: return NA_STRING; } } else if(TYPEOF(x) == CHARSXP) { return x; } else if(TYPEOF(x) == SYMSXP) return PRINTNAME(x); return NA_STRING; } Rboolean isUnordered(SEXP s) { return (TYPEOF(s) == INTSXP && inherits(s, "factor") && !inherits(s, "ordered")); } Rboolean isOrdered(SEXP s) { return (TYPEOF(s) == INTSXP && inherits(s, "factor") && inherits(s, "ordered")); } const static struct { const char * const str; const int type; } TypeTable[] = { { "NULL", NILSXP }, /* real types */ { "symbol", SYMSXP }, { "pairlist", LISTSXP }, { "closure", CLOSXP }, { "environment", ENVSXP }, { "promise", PROMSXP }, { "language", LANGSXP }, { "special", SPECIALSXP }, { "builtin", BUILTINSXP }, { "char", CHARSXP }, { "logical", LGLSXP }, { "integer", INTSXP }, { "double", REALSXP }, /*- "real", for R <= 0.61.x */ { "complex", CPLXSXP }, { "character", STRSXP }, { "...", DOTSXP }, { "any", ANYSXP }, { "expression", EXPRSXP }, { "list", VECSXP }, { "externalptr", EXTPTRSXP }, { "bytecode", BCODESXP }, { "weakref", WEAKREFSXP }, { "raw", RAWSXP }, { "S4", S4SXP }, /* aliases : */ { "numeric", REALSXP }, { "name", SYMSXP }, { (char *)NULL, -1 } }; SEXPTYPE str2type(const char *s) { int i; for (i = 0; TypeTable[i].str; i++) { if (!strcmp(s, TypeTable[i].str)) return (SEXPTYPE) TypeTable[i].type; } /* SEXPTYPE is an unsigned int, so the compiler warns us w/o the cast. */ return (SEXPTYPE) -1; } static struct { const char *cstrName; SEXP rcharName; SEXP rstrName; SEXP rsymName; } Type2Table[MAX_NUM_SEXPTYPE]; static int findTypeInTypeTable(SEXPTYPE t) { for (int i = 0; TypeTable[i].str; i++) if (TypeTable[i].type == t) return i; return -1; } // called from main.c attribute_hidden void InitTypeTables(void) { /* Type2Table */ for (int type = 0; type < MAX_NUM_SEXPTYPE; type++) { int j = findTypeInTypeTable(type); if (j != -1) { const char *cstr = TypeTable[j].str; SEXP rchar = PROTECT(mkChar(cstr)); SEXP rstr = ScalarString(rchar); MARK_NOT_MUTABLE(rstr); R_PreserveObject(rstr); UNPROTECT(1); /* rchar */ SEXP rsym = install(cstr); Type2Table[type].cstrName = cstr; Type2Table[type].rcharName = rchar; Type2Table[type].rstrName = rstr; Type2Table[type].rsymName = rsym; } else { Type2Table[type].cstrName = NULL; Type2Table[type].rcharName = NULL; Type2Table[type].rstrName = NULL; Type2Table[type].rsymName = NULL; } } } SEXP type2str_nowarn(SEXPTYPE t) /* returns a CHARSXP */ { if (t < MAX_NUM_SEXPTYPE) { /* FIXME: branch not really needed */ SEXP res = Type2Table[t].rcharName; if (res != NULL) return res; } return R_NilValue; } SEXP type2str(SEXPTYPE t) /* returns a CHARSXP */ { SEXP s = type2str_nowarn(t); if (s != R_NilValue) { return s; } warning(_("type %d is unimplemented in '%s'"), t, "type2str"); char buf[50]; snprintf(buf, 50, "unknown type #%d", t); return mkChar(buf); } SEXP type2rstr(SEXPTYPE t) /* returns a STRSXP */ { if (t < MAX_NUM_SEXPTYPE) { /* FIXME: branch not really needed */ SEXP res = Type2Table[t].rstrName; if (res != NULL) return res; } error(_("type %d is unimplemented in '%s'"), t, "type2ImmutableScalarString"); return R_NilValue; /* for -Wall */ } const char *type2char(SEXPTYPE t) /* returns a char* */ { if (t < MAX_NUM_SEXPTYPE) { /* FIXME: branch not really needed */ const char * res = Type2Table[t].cstrName; if (res != NULL) return res; } warning(_("type %d is unimplemented in '%s'"), t, "type2char"); static char buf[50]; snprintf(buf, 50, "unknown type #%d", t); return buf; } #ifdef UNUSED SEXP NORET type2symbol(SEXPTYPE t) { if (t >= 0 && t < MAX_NUM_SEXPTYPE) { /* FIXME: branch not really needed */ SEXP res = Type2Table[t].rsymName; if (res != NULL) { return res; } } error(_("type %d is unimplemented in '%s'"), t, "type2symbol"); } #endif attribute_hidden void NORET UNIMPLEMENTED_TYPEt(const char *s, SEXPTYPE t) { int i; for (i = 0; TypeTable[i].str; i++) { if (TypeTable[i].type == t) error(_("unimplemented type '%s' in '%s'\n"), TypeTable[i].str, s); } error(_("unimplemented type (%d) in '%s'\n"), t, s); } void NORET UNIMPLEMENTED_TYPE(const char *s, SEXP x) { UNIMPLEMENTED_TYPEt(s, TYPEOF(x)); } # include # include # include /* Previous versions of R (< 2.3.0) assumed wchar_t was in Unicode (and it commonly is). These functions do not. */ # ifdef WORDS_BIGENDIAN static const char UCS2ENC[] = "UCS-2BE"; # else static const char UCS2ENC[] = "UCS-2LE"; # endif /* * out=NULL returns the number of the MBCS chars */ /* Note: this does not terminate out, as all current uses are to look * at 'out' a wchar at a time, and sometimes just one char. */ size_t mbcsToUcs2(const char *in, ucs2_t *out, int nout, int enc) { void *cd = NULL ; const char *i_buf; char *o_buf; size_t i_len, o_len, status, wc_len; /* out length */ wc_len = (enc == CE_UTF8)? utf8towcs(NULL, in, 0) : mbstowcs(NULL, in, 0); if (out == NULL || (int)wc_len < 0) return wc_len; if ((void*)-1 == (cd = Riconv_open(UCS2ENC, (enc == CE_UTF8) ? "UTF-8": ""))) return (size_t) -1; i_buf = (char *)in; i_len = strlen(in); /* not including terminator */ o_buf = (char *)out; o_len = ((size_t) nout) * sizeof(ucs2_t); status = Riconv(cd, &i_buf, (size_t *)&i_len, &o_buf, (size_t *)&o_len); int serrno = errno; Riconv_close(cd); if (status == (size_t)-1) { switch(serrno){ case EINVAL: return (size_t) -2; case EILSEQ: return (size_t) -1; case E2BIG: break; default: errno = EILSEQ; return (size_t) -1; } } return wc_len; /* status would be better? */ } #include /* This one is not in Rinternals.h, but is used in internet module */ Rboolean isBlankString(const char *s) { if(mbcslocale) { wchar_t wc; size_t used; mbstate_t mb_st; mbs_init(&mb_st); while( (used = Mbrtowc(&wc, s, MB_CUR_MAX, &mb_st)) ) { if(!iswspace((wint_t) wc)) return FALSE; s += used; } } else while (*s) if (!isspace((int)*s++)) return FALSE; return TRUE; } Rboolean StringBlank(SEXP x) { if (x == R_NilValue) return TRUE; else return CHAR(x)[0] == '\0'; } /* Function to test whether a string is a true value */ Rboolean StringTrue(const char *name) { int i; for (i = 0; truenames[i]; i++) if (!strcmp(name, truenames[i])) return TRUE; return FALSE; } Rboolean StringFalse(const char *name) { int i; for (i = 0; falsenames[i]; i++) if (!strcmp(name, falsenames[i])) return TRUE; return FALSE; } /* used in bind.c and options.c */ SEXP attribute_hidden EnsureString(SEXP s) { switch(TYPEOF(s)) { case SYMSXP: s = PRINTNAME(s); break; case STRSXP: s = STRING_ELT(s, 0); break; case CHARSXP: break; case NILSXP: s = R_BlankString; break; default: error(_("invalid tag in name extraction")); } return s; } /* FIXME: ngettext reguires unsigned long, but %u would seem appropriate */ void Rf_checkArityCall(SEXP op, SEXP args, SEXP call) { if (PRIMARITY(op) >= 0 && PRIMARITY(op) != length(args)) { if (PRIMINTERNAL(op)) error(ngettext("%d argument passed to .Internal(%s) which requires %d", "%d arguments passed to .Internal(%s) which requires %d", (unsigned long) length(args)), length(args), PRIMNAME(op), PRIMARITY(op)); else errorcall(call, ngettext("%d argument passed to '%s' which requires %d", "%d arguments passed to '%s' which requires %d", (unsigned long) length(args)), length(args), PRIMNAME(op), PRIMARITY(op)); } } void attribute_hidden Rf_check1arg(SEXP arg, SEXP call, const char *formal) { SEXP tag = TAG(arg); const char *supplied; size_t ns; if (tag == R_NilValue) return; supplied = CHAR(PRINTNAME(tag)); ns = strlen(supplied); if (ns > strlen(formal) || strncmp(supplied, formal, ns)) errorcall(call, _("supplied argument name '%s' does not match '%s'"), supplied, formal); } SEXP nthcdr(SEXP s, int n) { if (isList(s) || isLanguage(s) || isFrame(s) || TYPEOF(s) == DOTSXP ) { while( n-- > 0 ) { if (s == R_NilValue) error(_("'nthcdr' list shorter than %d"), n); s = CDR(s); } return s; } else error(_("'nthcdr' needs a list to CDR down")); return R_NilValue;/* for -Wall */ } /* This is a primitive (with no arguments) */ SEXP attribute_hidden do_nargs(SEXP call, SEXP op, SEXP args, SEXP rho) { RCNTXT *cptr; int nargs = NA_INTEGER; checkArity(op, args); for (cptr = R_GlobalContext; cptr != NULL; cptr = cptr->nextcontext) { if ((cptr->callflag & CTXT_FUNCTION) && cptr->cloenv == rho) { nargs = length(cptr->promargs); break; } } return ScalarInteger(nargs); } /* formerly used in subscript.c, in Utils.h */ void attribute_hidden setIVector(int * vec, int len, int val) { for (int i = 0; i < len; i++) vec[i] = val; } /* unused in R, in Utils.h, apparently used in Rcpp */ void attribute_hidden setRVector(double * vec, int len, double val) { for (int i = 0; i < len; i++) vec[i] = val; } /* unused in R, in Rinternals.h */ void setSVector(SEXP * vec, int len, SEXP val) { for (int i = 0; i < len; i++) vec[i] = val; } Rboolean isFree(SEXP val) { SEXP t; for (t = R_FreeSEXP; t != R_NilValue; t = CAR(t)) if (val == t) return TRUE; return FALSE; } /* Debugging functions (hence the d-prefix). */ /* These are intended to be called interactively from */ /* a debugger such as gdb, so you don't have to remember */ /* the names of the data structure components. */ int dtype(SEXP q) { return((int)TYPEOF(q)); } SEXP dcar(SEXP l) { return(CAR(l)); } SEXP dcdr(SEXP l) { return(CDR(l)); } static void isort_with_index(int *x, int *indx, int n) { int i, j, h, iv, v; for (h = 1; h <= n / 9; h = 3 * h + 1); for (; h > 0; h /= 3) for (i = h; i < n; i++) { v = x[i]; iv = indx[i]; j = i; while (j >= h && x[j - h] > v) { x[j] = x[j - h]; indx[j] = indx[j-h]; j -= h; } x[j] = v; indx[j] = iv; } } // body(x) without attributes "srcref", "srcfile", "wholeSrcref" : // NOTE: Callers typically need PROTECT(R_body_no_src(.)) SEXP R_body_no_src(SEXP x) { SEXP b = PROTECT(duplicate(BODY_EXPR(x))); /* R's removeSource() works *recursively* on the body() in ../library/utils/R/sourceutils.R but that seems unneeded (?) */ setAttrib(b, R_SrcrefSymbol, R_NilValue); setAttrib(b, R_SrcfileSymbol, R_NilValue); setAttrib(b, R_WholeSrcrefSymbol, R_NilValue); UNPROTECT(1); return b; } /* merge(xinds, yinds, all.x, all.y) */ /* xinds, yinds are along x and y rows matching into the (numeric) common indices, with 0 for non-matches. all.x and all.y are boolean. The return value is a list with 4 elements (xi, yi, x.alone, y.alone), which are index vectors for rows of x or y. */ SEXP attribute_hidden do_merge(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP xi, yi, ansx, ansy, ans; int nx = 0, ny = 0, i, j, k, nx_lone = 0, ny_lone = 0; int all_x = 0, all_y = 0, ll = 0/* "= 0" : for -Wall */; int nnx, nny; checkArity(op, args); xi = CAR(args); // NB: long vectors are not supported for input if ( !isInteger(xi) || !(nx = LENGTH(xi)) ) error(_("invalid '%s' argument"), "xinds"); yi = CADR(args); if ( !isInteger(yi) || !(ny = LENGTH(yi)) ) error(_("invalid '%s' argument"), "yinds"); if(!LENGTH(ans = CADDR(args)) || NA_LOGICAL == (all_x = asLogical(ans))) error(_("'all.x' must be TRUE or FALSE")); if(!LENGTH(ans = CADDDR(args))|| NA_LOGICAL == (all_y = asLogical(ans))) error(_("'all.y' must be TRUE or FALSE")); /* 0. sort the indices */ int *ix = (int *) R_alloc((size_t) nx, sizeof(int)); int *iy = (int *) R_alloc((size_t) ny, sizeof(int)); for(i = 0; i < nx; i++) ix[i] = i+1; for(i = 0; i < ny; i++) iy[i] = i+1; isort_with_index(INTEGER(xi), ix, nx); isort_with_index(INTEGER(yi), iy, ny); /* 1. determine result sizes */ for (i = 0; i < nx; i++) if (INTEGER(xi)[i] > 0) break; nx_lone = i; for (i = 0; i < ny; i++) if (INTEGER(yi)[i] > 0) break; ny_lone = i; double dnans = 0; for (i = nx_lone, j = ny_lone; i < nx; i = nnx, j = nny) { int tmp = INTEGER(xi)[i]; for(nnx = i; nnx < nx; nnx++) if(INTEGER(xi)[nnx] != tmp) break; /* the next is not in theory necessary, since we have the common values only */ for(; j < ny; j++) if(INTEGER(yi)[j] >= tmp) break; for(nny = j; nny < ny; nny++) if(INTEGER(yi)[nny] != tmp) break; /* printf("i %d nnx %d j %d nny %d\n", i, nnx, j, nny); */ dnans += ((double)(nnx-i))*(nny-j); } if (dnans > R_XLEN_T_MAX) error(_("number of rows in the result exceeds maximum vector length")); R_xlen_t nans = (int) dnans; /* 2. allocate and store result components */ const char *nms[] = {"xi", "yi", "x.alone", "y.alone", ""}; ans = PROTECT(mkNamed(VECSXP, nms)); ansx = allocVector(INTSXP, nans); SET_VECTOR_ELT(ans, 0, ansx); ansy = allocVector(INTSXP, nans); SET_VECTOR_ELT(ans, 1, ansy); if(all_x) { SEXP x_lone = allocVector(INTSXP, nx_lone); SET_VECTOR_ELT(ans, 2, x_lone); for (i = 0, ll = 0; i < nx_lone; i++) INTEGER(x_lone)[ll++] = ix[i]; } if(all_y) { SEXP y_lone = allocVector(INTSXP, ny_lone); SET_VECTOR_ELT(ans, 3, y_lone); for (i = 0, ll = 0; i < ny_lone; i++) INTEGER(y_lone)[ll++] = iy[i]; } for (i = nx_lone, j = ny_lone, k = 0; i < nx; i = nnx, j = nny) { int tmp = INTEGER(xi)[i]; for(nnx = i; nnx < nx; nnx++) if(INTEGER(xi)[nnx] != tmp) break; for(; j < ny; j++) if(INTEGER(yi)[j] >= tmp) break; for(nny = j; nny < ny; nny++) if(INTEGER(yi)[nny] != tmp) break; for(int i0 = i; i0 < nnx; i0++) for(int j0 = j; j0 < nny; j0++) { INTEGER(ansx)[k] = ix[i0]; INTEGER(ansy)[k++] = iy[j0]; } } UNPROTECT(1); return ans; } /* Functions for getting and setting the working directory. */ #ifdef Win32 # define WIN32_LEAN_AND_MEAN 1 # include #endif SEXP static intern_getwd(void) { SEXP rval = R_NilValue; char buf[4*PATH_MAX+1]; #ifdef Win32 { wchar_t wbuf[PATH_MAX+1]; int res = GetCurrentDirectoryW(PATH_MAX, wbuf); if(res > 0) { wcstoutf8(buf, wbuf, sizeof(buf)); R_UTF8fixslash(buf); PROTECT(rval = allocVector(STRSXP, 1)); SET_STRING_ELT(rval, 0, mkCharCE(buf, CE_UTF8)); UNPROTECT(1); } } #else char *res = getcwd(buf, PATH_MAX); /* can return NULL */ if(res) rval = mkString(buf); #endif return(rval); } SEXP attribute_hidden do_getwd(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); return(intern_getwd()); } #if defined(Win32) && defined(_MSC_VER) # include /* for chdir, via io.h */ #endif SEXP attribute_hidden do_setwd(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP s = R_NilValue, wd = R_NilValue; /* -Wall */ checkArity(op, args); if (!isPairList(args) || !isValidString(s = CAR(args))) error(_("character argument expected")); if (STRING_ELT(s, 0) == NA_STRING) error(_("missing value is invalid")); /* get current directory to return */ PROTECT(wd = intern_getwd()); #ifdef Win32 { const wchar_t *path = filenameToWchar(STRING_ELT(s, 0), TRUE); if(_wchdir(path) < 0) error(_("cannot change working directory")); } #else { const char *path = R_ExpandFileName(translateChar(STRING_ELT(s, 0))); if(chdir(path) < 0) error(_("cannot change working directory")); } #endif UNPROTECT(1); /* wd */ return(wd); } /* remove portion of path before file separator if one exists */ #ifdef Win32 SEXP attribute_hidden do_basename(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, s = R_NilValue; /* -Wall */ char sp[4*PATH_MAX+1]; wchar_t buf[PATH_MAX], *p; const wchar_t *pp; int i, n; checkArity(op, args); if (TYPEOF(s = CAR(args)) != STRSXP) error(_("a character vector argument expected")); PROTECT(ans = allocVector(STRSXP, n = LENGTH(s))); for(i = 0; i < n; i++) { if (STRING_ELT(s, i) == NA_STRING) SET_STRING_ELT(ans, i, NA_STRING); else { pp = filenameToWchar(STRING_ELT(s, i), TRUE); if (wcslen(pp) > PATH_MAX - 1) error(_("path too long")); wcscpy(buf, pp); R_wfixslash(buf); /* remove trailing file separator(s) */ if (*buf) { p = buf + wcslen(buf) - 1; while (p >= buf && *p == L'/') *(p--) = L'\0'; } if ((p = wcsrchr(buf, L'/'))) p++; else p = buf; wcstoutf8(sp, p, sizeof(sp)); SET_STRING_ELT(ans, i, mkCharCE(sp, CE_UTF8)); } } UNPROTECT(1); return(ans); } #else SEXP attribute_hidden do_basename(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, s = R_NilValue; /* -Wall */ char buf[PATH_MAX], *p, fsp = FILESEP[0]; const char *pp; int i, n; checkArity(op, args); if (TYPEOF(s = CAR(args)) != STRSXP) error(_("a character vector argument expected")); PROTECT(ans = allocVector(STRSXP, n = LENGTH(s))); for(i = 0; i < n; i++) { if (STRING_ELT(s, i) == NA_STRING) SET_STRING_ELT(ans, i, NA_STRING); else { pp = R_ExpandFileName(translateChar(STRING_ELT(s, i))); if (strlen(pp) > PATH_MAX - 1) error(_("path too long")); strcpy (buf, pp); if (*buf) { p = buf + strlen(buf) - 1; while (p >= buf && *p == fsp) *(p--) = '\0'; } if ((p = Rf_strrchr(buf, fsp))) p++; else p = buf; SET_STRING_ELT(ans, i, mkChar(p)); } } UNPROTECT(1); return(ans); } #endif /* remove portion of path after last file separator if one exists, else return "." */ #ifdef Win32 SEXP attribute_hidden do_dirname(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, s = R_NilValue; /* -Wall */ wchar_t buf[PATH_MAX], *p; const wchar_t *pp; char sp[4*PATH_MAX+1]; int i, n; checkArity(op, args); if (TYPEOF(s = CAR(args)) != STRSXP) error(_("a character vector argument expected")); PROTECT(ans = allocVector(STRSXP, n = LENGTH(s))); for(i = 0; i < n; i++) { if (STRING_ELT(s, i) == NA_STRING) SET_STRING_ELT(ans, i, NA_STRING); else { memset(sp, 0, 4*PATH_MAX); pp = filenameToWchar(STRING_ELT(s, i), TRUE); if (wcslen(pp) > PATH_MAX - 1) error(_("path too long")); if (wcslen(pp)) { wcscpy (buf, pp); R_wfixslash(buf); /* remove trailing file separator(s) */ while ( *(p = buf + wcslen(buf) - 1) == L'/' && p > buf && (p > buf+2 || *(p-1) != L':')) *p = L'\0'; p = wcsrchr(buf, L'/'); if(p == NULL) wcscpy(buf, L"."); else { while(p > buf && *p == L'/' /* this covers both drives and network shares */ && (p > buf+2 || *(p-1) != L':')) --p; p[1] = L'\0'; } wcstoutf8(sp, buf, sizeof(sp)); } SET_STRING_ELT(ans, i, mkCharCE(sp, CE_UTF8)); } } UNPROTECT(1); return(ans); } #else SEXP attribute_hidden do_dirname(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, s = R_NilValue; /* -Wall */ char buf[PATH_MAX], *p, fsp = FILESEP[0]; const char *pp; int i, n; checkArity(op, args); if (TYPEOF(s = CAR(args)) != STRSXP) error(_("a character vector argument expected")); PROTECT(ans = allocVector(STRSXP, n = LENGTH(s))); for(i = 0; i < n; i++) { if (STRING_ELT(s, i) == NA_STRING) SET_STRING_ELT(ans, i, NA_STRING); else { pp = R_ExpandFileName(translateChar(STRING_ELT(s, i))); if (strlen(pp) > PATH_MAX - 1) error(_("path too long")); size_t ll = strlen(pp); if (ll) { // svMisc calls this with "" strcpy (buf, pp); /* remove trailing file separator(s) */ while ( *(p = buf + ll - 1) == fsp && p > buf) *p = '\0'; p = Rf_strrchr(buf, fsp); if(p == NULL) strcpy(buf, "."); else { while(p > buf && *p == fsp) --p; p[1] = '\0'; } } else buf[0] = '\0'; SET_STRING_ELT(ans, i, mkChar(buf)); } } UNPROTECT(1); return(ans); } #endif #ifndef Win32 /* Windows version is in src/gnuwin32/extra.c */ #ifndef HAVE_DECL_REALPATH extern char *realpath(const char *path, char *resolved_path); #endif SEXP attribute_hidden do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, paths = CAR(args); int i, n = LENGTH(paths); const char *path; char abspath[PATH_MAX+1]; checkArity(op, args); if (!isString(paths)) error(_("'path' must be a character vector")); int mustWork = asLogical(CADDR(args)); /* 1, NA_LOGICAL or 0 */ /* Does any platform not have this? */ #ifdef HAVE_REALPATH PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { path = translateChar(STRING_ELT(paths, i)); char *res = realpath(path, abspath); if (res) SET_STRING_ELT(ans, i, mkChar(abspath)); else { SET_STRING_ELT(ans, i, STRING_ELT(paths, i)); /* and report the problem */ if (mustWork == 1) error("path[%d]=\"%s\": %s", i+1, path, strerror(errno)); else if (mustWork == NA_LOGICAL) warning("path[%d]=\"%s\": %s", i+1, path, strerror(errno)); } } #else Rboolean OK; warning("this platform does not have realpath so the results may not be canonical"); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { path = translateChar(STRING_ELT(paths, i)); OK = strlen(path) <= PATH_MAX; if (OK) { if (path[0] == '/') strncpy(abspath, path, PATH_MAX); else { OK = getcwd(abspath, PATH_MAX) != NULL; OK = OK && (strlen(path) + strlen(abspath) + 1 <= PATH_MAX); if (OK) {strcat(abspath, "/"); strcat(abspath, path);} } } /* we need to check that this exists */ if (OK) OK = (access(abspath, 0 /* F_OK */) == 0); if (OK) SET_STRING_ELT(ans, i, mkChar(abspath)); else { SET_STRING_ELT(ans, i, STRING_ELT(paths, i)); /* and report the problem */ if (mustWork == 1) error("path[%d]=\"%s\": %s", i+1, path, strerror(errno)); else if (mustWork == NA_LOGICAL) warning("path[%d]=\"%s\": %s", i+1, path, strerror(errno)); } } #endif UNPROTECT(1); return ans; } #ifdef USE_INTERNAL_MKTIME const char *getTZinfo(void) { static char def_tz[PATH_MAX+1] = ""; if (def_tz[0]) return def_tz; // call Sys.timezone() SEXP expr = PROTECT(install("Sys.timezone")); SEXP call = PROTECT(lang1(expr)); SEXP ans = PROTECT(eval(call, R_GlobalEnv)); if(TYPEOF(ans) == STRSXP && LENGTH(ans) == 1) { SEXP el = STRING_ELT(ans, 0); if (el != NA_STRING) { strcpy(def_tz, CHAR(el)); // printf("tz is %s\n", CHAR(el)); UNPROTECT(3); return def_tz; } } UNPROTECT(3); warning("system timezone name is unknown: set environment variable TZ"); strcpy(def_tz, "unknown"); // code will then use TZDEFAULT, which is "UTC" return def_tz; } #endif #endif // not Win32 /* encodeString(x, w, quote, justify) */ SEXP attribute_hidden do_encodeString(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, x, s; R_xlen_t i, len; int w, quote = 0, justify, na; const char *cs; Rboolean findWidth; checkArity(op, args); if (TYPEOF(x = CAR(args)) != STRSXP) error(_("a character vector argument expected")); if(isNull(CADR(args))) w = NA_INTEGER; else { w = asInteger(CADR(args)); if(w != NA_INTEGER && w < 0) error(_("invalid '%s' value"), "width"); } findWidth = (w == NA_INTEGER); s = CADDR(args); if(LENGTH(s) != 1 || TYPEOF(s) != STRSXP) error(_("invalid '%s' value"), "quote"); cs = translateChar(STRING_ELT(s, 0)); if(strlen(cs) > 0) quote = cs[0]; if(strlen(cs) > 1) warning(_("only the first character of 'quote' will be used")); justify = asInteger(CADDDR(args)); if(justify == NA_INTEGER || justify < 0 || justify > 3) error(_("invalid '%s' value"), "justify"); if(justify == 3) w = 0; na = asLogical(CAD4R(args)); if(na == NA_LOGICAL) error(_("invalid '%s' value"), "na.encode"); len = XLENGTH(x); if(findWidth && justify < 3) { w = 0; for(i = 0; i < len; i++) { s = STRING_ELT(x, i); if(na || s != NA_STRING) w = R_imax2(w, Rstrlen(s, quote)); } if(quote) w +=2; /* for surrounding quotes */ } PROTECT(ans = duplicate(x)); for(i = 0; i < len; i++) { s = STRING_ELT(x, i); if(na || s != NA_STRING) { cetype_t ienc = getCharCE(s); if(ienc == CE_UTF8) { const char *ss = EncodeString(s, w-1000000, quote, (Rprt_adj) justify); SET_STRING_ELT(ans, i, mkCharCE(ss, ienc)); } else { const char *ss = EncodeString(s, w, quote, (Rprt_adj) justify); SET_STRING_ELT(ans, i, mkChar(ss)); } } } UNPROTECT(1); return ans; } SEXP attribute_hidden do_encoding(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, x; R_xlen_t i, n; char *tmp; checkArity(op, args); if (TYPEOF(x = CAR(args)) != STRSXP) error(_("a character vector argument expected")); n = XLENGTH(x); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { if(IS_BYTES(STRING_ELT(x, i))) tmp = "bytes"; else if(IS_LATIN1(STRING_ELT(x, i))) tmp = "latin1"; else if(IS_UTF8(STRING_ELT(x, i))) tmp = "UTF-8"; else tmp = "unknown"; SET_STRING_ELT(ans, i, mkChar(tmp)); } UNPROTECT(1); return ans; } SEXP attribute_hidden do_setencoding(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP x, enc, tmp; int m; R_xlen_t i, n; const char *this; checkArity(op, args); if (TYPEOF(x = CAR(args)) != STRSXP) error(_("a character vector argument expected")); if (TYPEOF(enc = CADR(args)) != STRSXP) error(_("a character vector 'value' expected")); m = LENGTH(enc); if(m == 0) error(_("'value' must be of positive length")); if(MAYBE_REFERENCED(x)) x = duplicate(x); PROTECT(x); n = XLENGTH(x); for(i = 0; i < n; i++) { cetype_t ienc = CE_NATIVE; this = CHAR(STRING_ELT(enc, i % m)); /* ASCII */ if(streql(this, "latin1")) ienc = CE_LATIN1; else if(streql(this, "UTF-8")) ienc = CE_UTF8; else if(streql(this, "bytes")) ienc = CE_BYTES; tmp = STRING_ELT(x, i); if(tmp == NA_STRING) continue; if (! ((ienc == CE_LATIN1 && IS_LATIN1(tmp)) || (ienc == CE_UTF8 && IS_UTF8(tmp)) || (ienc == CE_BYTES && IS_BYTES(tmp)) || (ienc == CE_NATIVE && ! IS_LATIN1(tmp) && ! IS_UTF8(tmp)))) SET_STRING_ELT(x, i, mkCharLenCE(CHAR(tmp), LENGTH(tmp), ienc)); } UNPROTECT(1); return x; } SEXP attribute_hidden markKnown(const char *s, SEXP ref) { int ienc = 0; if(ENC_KNOWN(ref)) { if(known_to_be_latin1) ienc = CE_LATIN1; if(known_to_be_utf8) ienc = CE_UTF8; } return mkCharCE(s, ienc); } Rboolean strIsASCII(const char *str) { const char *p; for(p = str; *p; p++) if((unsigned int)*p > 0x7F) return FALSE; return TRUE; } /* Number of additional bytes */ static const unsigned char utf8_table4[] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; int attribute_hidden utf8clen(char c) { /* This allows through 8-bit chars 10xxxxxx, which are invalid */ if ((c & 0xc0) != 0xc0) return 1; return 1 + utf8_table4[c & 0x3f]; } static Rwchar_t utf16toucs(wchar_t high, wchar_t low) { return 0x10000 + ((int) (high & 0x3FF) << 10 ) + (int) (low & 0x3FF); } /* Return the low UTF-16 surrogate from a UTF-8 string; assumes all testing has been done. */ static wchar_t utf8toutf16low(const char *s) { return (unsigned int) LOW_SURROGATE_START | ((s[2] & 0x0F) << 6) | (s[3] & 0x3F); } Rwchar_t attribute_hidden utf8toucs32(wchar_t high, const char *s) { return utf16toucs(high, utf8toutf16low(s)); } /* These return the result in wchar_t. If wchar_t is 16 bit (e.g. UTF-16LE on Windows only the high surrogate is returned; call utf8toutf16low next. */ size_t attribute_hidden utf8toucs(wchar_t *wc, const char *s) { unsigned int byte; wchar_t local, *w; byte = *((unsigned char *)s); w = wc ? wc: &local; if (byte == 0) { *w = (wchar_t) 0; return 0; } else if (byte < 0xC0) { *w = (wchar_t) byte; return 1; } else if (byte < 0xE0) { if(strlen(s) < 2) return (size_t)-2; if ((s[1] & 0xC0) == 0x80) { *w = (wchar_t) (((byte & 0x1F) << 6) | (s[1] & 0x3F)); return 2; } else return (size_t)-1; } else if (byte < 0xF0) { if(strlen(s) < 3) return (size_t)-2; if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) { *w = (wchar_t) (((byte & 0x0F) << 12) | (unsigned int) ((s[1] & 0x3F) << 6) | (s[2] & 0x3F)); byte = (unsigned int) *w; /* Surrogates range */ if(byte >= 0xD800 && byte <= 0xDFFF) return (size_t)-1; if(byte == 0xFFFE || byte == 0xFFFF) return (size_t)-1; return 3; } else return (size_t)-1; } else if (byte < 0xf8) { if(strlen(s) < 4) return (size_t)-2; if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80) && ((s[3] & 0xC0) == 0x80)) { unsigned int cvalue = (((byte & 0x0F) << 18) | (unsigned int) ((s[1] & 0x3F) << 12) | (unsigned int) ((s[2] & 0x3F) << 6) | (s[3] & 0x3F)); if(sizeof(wchar_t) < 4) /* Assume UTF-16 and return high surrogate. Users need to call utf8toutf16low next. */ *w = (wchar_t) ((cvalue - 0x10000) >> 10) | 0xD800; else *w = (wchar_t) cvalue; return 4; } else return (size_t)-1; } if(sizeof(wchar_t) < 4) return (size_t)-2; /* So now handle 5.6 byte sequences with no testing */ if (byte < 0xFC) { if(strlen(s) < 5) return (size_t)-2; *w = (wchar_t) (((byte & 0x0F) << 24) | (unsigned int) ((s[1] & 0x3F) << 12) | (unsigned int) ((s[2] & 0x3F) << 12) | (unsigned int) ((s[3] & 0x3F) << 6) | (s[4] & 0x3F)); return 5; } else { if(strlen(s) < 6) return (size_t)-2; *w = (wchar_t) (((byte & 0x0F) << 30) | (unsigned int) ((s[1] & 0x3F) << 24) | (unsigned int) ((s[2] & 0x3F) << 18) | (unsigned int) ((s[3] & 0x3F) << 12) | (unsigned int) ((s[4] & 0x3F) << 6) | (s[5] & 0x3F)); return 6; } } size_t utf8towcs(wchar_t *wc, const char *s, size_t n) { ssize_t m, res = 0; const char *t; wchar_t *p; wchar_t local; if(wc) for(p = wc, t = s; ; p++, t += m) { m = (ssize_t) utf8toucs(p, t); if (m < 0) error(_("invalid input '%s' in 'utf8towcs'"), s); if (m == 0) break; res ++; if (res >= n) break; if (IS_HIGH_SURROGATE(*p)) { *(++p) = utf8toutf16low(t); res ++; if (res >= n) break; } } else for(t = s; ; res++, t += m) { m = (ssize_t) utf8toucs(&local, t); if (m < 0) error(_("invalid input '%s' in 'utf8towcs'"), s); if (m == 0) break; } return (size_t) res; } /* based on pcre.c */ static const unsigned int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; static const unsigned int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; /* s is NULL, or it contains at least n bytes. Just write a a terminator if it's not big enough. */ static size_t Rwcrtomb32(char *s, Rwchar_t cvalue, size_t n) { register size_t i, j; if (!n) return 0; if (s) *s = 0; /* Simplifies exit later */ if(cvalue == 0) return 0; for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) if (cvalue <= utf8_table1[i]) break; if (i >= n - 1) return 0; /* need space for terminal null */ if (s) { s += i; for (j = i; j > 0; j--) { *s-- = (char) (0x80 | (cvalue & 0x3f)); cvalue >>= 6; } *s = (char) (utf8_table2[i] | cvalue); } return i + 1; } /* on input, wc is a string encoded in UTF-16 or UCS-2 or UCS-4. s can be a buffer of size n>=0 chars, or NULL. If n=0 or s=NULL, nothing is written. The return value is the number of chars including the terminating null. If the buffer is not big enough, the result is truncated but still null-terminated */ attribute_hidden // but used in windlgs size_t wcstoutf8(char *s, const wchar_t *wc, size_t n) { size_t m, res=0; char *t; const wchar_t *p; if (!n) return 0; for(p = wc, t = s; ; p++) { if (IS_SURROGATE_PAIR(*p, *(p+1))) { Rwchar_t cvalue = ((*p & 0x3FF) << 10) + (*(p+1) & 0x3FF) + 0x010000; m = Rwcrtomb32(t, cvalue, n - res); p++; } else m = Rwcrtomb32(t, (Rwchar_t)(*p), n - res); if (!m) break; res += m; if (t) t += m; } return res + 1; } /* A version that reports failure as an error */ size_t Mbrtowc(wchar_t *wc, const char *s, size_t n, mbstate_t *ps) { size_t used; if(n <= 0 || !*s) return (size_t)0; used = mbrtowc(wc, s, n, ps); if((int) used < 0) { /* This gets called from the menu setup in RGui */ if (!R_Is_Running) return (size_t)-1; /* let's try to print out a readable version */ R_CheckStack2(4*strlen(s) + 10); char err[4*strlen(s) + 1], *q; const char *p; for(p = s, q = err; *p; ) { /* don't do the first to keep ps state straight */ if(p > s) used = mbrtowc(NULL, p, n, ps); if(used == 0) break; else if((int) used > 0) { memcpy(q, p, used); p += used; q += used; n -= used; } else { sprintf(q, "<%02x>", (unsigned char) *p++); q += 4; n--; } } *q = '\0'; error(_("invalid multibyte string at '%s'"), err); } return used; } attribute_hidden Rboolean mbcsValid(const char *str) { return ((int)mbstowcs(NULL, str, 0) >= 0); } /* used in src/library/grDevices/src/cairo/cairoFns.c */ #include "valid_utf8.h" Rboolean utf8Valid(const char *str) { return valid_utf8(str, strlen(str)) == 0; } SEXP attribute_hidden do_validUTF8(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP x = CAR(args); if (!isString(x)) error(_("invalid '%s' argument"), "x"); R_xlen_t n = XLENGTH(x); SEXP ans = allocVector(LGLSXP, n); // no allocation below int *lans = LOGICAL(ans); for (R_xlen_t i = 0; i < n; i++) lans[i] = utf8Valid(CHAR(STRING_ELT(x, i))); return ans; } SEXP attribute_hidden do_validEnc(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP x = CAR(args); if (!isString(x)) error(_("invalid '%s' argument"), "x"); R_xlen_t n = XLENGTH(x); SEXP ans = allocVector(LGLSXP, n); // no allocation below int *lans = LOGICAL(ans); for (R_xlen_t i = 0; i < n; i++) { SEXP p = STRING_ELT(x, i); if (IS_BYTES(p) || IS_LATIN1(p)) lans[i] = 1; else if (IS_UTF8(p) || utf8locale) lans[i] = utf8Valid(CHAR(p)); else if(mbcslocale) lans[i] = mbcsValid(CHAR(p)); else lans[i] = 1; } return ans; } /* MBCS-aware versions of common comparisons. Only used for ASCII c */ char *Rf_strchr(const char *s, int c) { char *p = (char *)s; mbstate_t mb_st; size_t used; if(!mbcslocale || utf8locale) return strchr(s, c); mbs_init(&mb_st); while( (used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st)) ) { if(*p == c) return p; p += used; } return (char *)NULL; } char *Rf_strrchr(const char *s, int c) { char *p = (char *)s, *plast = NULL; mbstate_t mb_st; size_t used; if(!mbcslocale || utf8locale) return strrchr(s, c); mbs_init(&mb_st); while( (used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st)) ) { if(*p == c) plast = p; p += used; } return plast; } #ifdef Win32 void R_fixslash(char *s) { char *p = s; if(mbcslocale) { mbstate_t mb_st; int used; mbs_init(&mb_st); while((used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st))) { if(*p == '\\') *p = '/'; p += used; } } else for (; *p; p++) if (*p == '\\') *p = '/'; /* preserve network shares */ if(s[0] == '/' && s[1] == '/') s[0] = s[1] = '\\'; } void R_UTF8fixslash(char *s) { char *p = s; for (; *p; p++) if (*p == '\\') *p = '/'; /* preserve network shares */ if(s[0] == '/' && s[1] == '/') s[0] = s[1] = '\\'; } static void R_wfixslash(wchar_t *s) { wchar_t *p = s; for (; *p; p++) if (*p == L'\\') *p = L'/'; /* preserve network shares */ if(s[0] == L'/' && s[1] == L'/') s[0] = s[1] = L'\\'; } void R_fixbackslash(char *s) { char *p = s; if(mbcslocale) { mbstate_t mb_st; int used; mbs_init(&mb_st); while((used = Mbrtowc(NULL, p, MB_CUR_MAX, &mb_st))) { if(*p == '/') *p = '\\'; p += used; } } else for (; *p; p++) if (*p == '/') *p = '\\'; } #endif void NORET F77_SYMBOL(rexitc)(char *msg, int *nchar) { int nc = *nchar; char buf[256]; if(nc > 255) { warning(_("error message truncated to 255 chars")); nc = 255; } strncpy(buf, msg, (size_t) nc); buf[nc] = '\0'; error("%s", buf); } void F77_SYMBOL(rwarnc)(char *msg, int *nchar) { int nc = *nchar; char buf[256]; if(nc > 255) { warning(_("warning message truncated to 255 chars")); nc = 255; } strncpy(buf, msg, (size_t) nc); buf[nc] = '\0'; warning("%s", buf); } void F77_SYMBOL(rchkusr)(void) { R_CheckUserInterrupt(); } /* Return a copy of a string using memory from R_alloc. NB: caller has to manage R_alloc stack. Used in platform.c */ char *acopy_string(const char *in) { char *out; size_t len = strlen(in); if (len > 0) { out = (char *) R_alloc(1 + len, sizeof(char)); strcpy(out, in); } else out = ""; return out; } /* Table from http://unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt */ static int s2u[224] = { 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, 0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393, 0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F, 0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9, 0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F, 0xF8E5, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3, 0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF, 0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9, 0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x20AC, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, 0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x2022, 0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0xF8E6, 0xF8E7, 0x21B5, 0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, 0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, 0x2220, 0x2207, 0xF6DA, 0xF6D9, 0xF6DB, 0x220F, 0x221A, 0x22C5, 0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x25CA, 0x2329, 0xF8E8, 0xF8E9, 0xF8EA, 0x2211, 0xF8EB, 0xF8EC, 0xF8ED, 0xF8EE, 0xF8EF, 0xF8F0, 0xF8F1, 0xF8F2, 0xF8F3, 0xF8F4, 0x0020, 0x232A, 0x222B, 0x2320, 0xF8F5, 0x2321, 0xF8F6, 0xF8F7, 0xF8F8, 0xF8F9, 0xF8FA, 0xF8FB, 0xF8FC, 0xF8FD, 0xF8FE, 0x0020 }; void *Rf_AdobeSymbol2utf8(char *work, const char *c0, size_t nwork) { const unsigned char *c = (unsigned char *) c0; unsigned char *t = (unsigned char *) work; while (*c) { if (*c < 32) *t++ = ' '; else { unsigned int u = (unsigned int) s2u[*c - 32]; if (u < 128) *t++ = (unsigned char) u; else if (u < 0x800) { *t++ = (unsigned char) (0xc0 | (u >> 6)); *t++ = (unsigned char) (0x80 | (u & 0x3f)); } else { *t++ = (unsigned char) (0xe0 | (u >> 12)); *t++ = (unsigned char) (0x80 | ((u >> 6) & 0x3f)); *t++ = (unsigned char) (0x80 | (u & 0x3f)); } } if (t+6 > (unsigned char *)(work + nwork)) break; c++; } *t = '\0'; return (char*) work; } int attribute_hidden Rf_AdobeSymbol2ucs2(int n) { if(n >= 32 && n < 256) return s2u[n-32]; else return 0; } double R_strtod5(const char *str, char **endptr, char dec, Rboolean NA, int exact) { LDOUBLE ans = 0.0, p10 = 10.0, fac = 1.0; int n, expn = 0, sign = 1, ndigits = 0, exph = -1; const char *p = str; /* optional whitespace */ while (isspace(*p)) p++; if (NA && strncmp(p, "NA", 2) == 0) { ans = NA_REAL; p += 2; goto done; } /* optional sign */ switch (*p) { case '-': sign = -1; case '+': p++; default: ; } if (strncasecmp(p, "NaN", 3) == 0) { ans = R_NaN; p += 3; goto done; /* C99 specifies this: must come first to avoid 'inf' match */ } else if (strncasecmp(p, "infinity", 8) == 0) { ans = R_PosInf; p += 8; goto done; } else if (strncasecmp(p, "Inf", 3) == 0) { ans = R_PosInf; p += 3; goto done; } if(strlen(p) > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { /* This will overflow to Inf if appropriate */ for(p += 2; p; p++) { if('0' <= *p && *p <= '9') ans = 16*ans + (*p -'0'); else if('a' <= *p && *p <= 'f') ans = 16*ans + (*p -'a' + 10); else if('A' <= *p && *p <= 'F') ans = 16*ans + (*p -'A' + 10); else if(*p == dec) {exph = 0; continue;} else break; if (exph >= 0) exph += 4; } #define strtod_EXACT_CLAUSE \ if(exact && ans > 0x1.fffffffffffffp52) { \ if(exact == NA_LOGICAL) \ warning(_( \ "accuracy loss in conversion from \"%s\" to numeric"), \ str); \ else { \ ans = NA_REAL; \ p = str; /* back out */ \ goto done; \ } \ } strtod_EXACT_CLAUSE; if (*p == 'p' || *p == 'P') { int expsign = 1; double p2 = 2.0; switch(*++p) { case '-': expsign = -1; case '+': p++; default: ; } /* The test for n is in response to PR#16358; it's not right if the exponent is very large, but the overflow or underflow below will handle it. */ #define MAX_EXPONENT_PREFIX 9999 for (n = 0; *p >= '0' && *p <= '9'; p++) n = (n < MAX_EXPONENT_PREFIX) ? n * 10 + (*p - '0') : n; if (ans != 0.0) { /* PR#15976: allow big exponents on 0 */ expn += expsign * n; if(exph > 0) { if (expn - exph < -122) { /* PR#17199: fac may overflow below if expn - exph is too small. 2^-122 is a bit bigger than 1E-37, so should be fine on all systems */ for (n = exph, fac = 1.0; n; n >>= 1, p2 *= p2) if (n & 1) fac *= p2; ans /= fac; p2 = 2.0; } else expn -= exph; } if (expn < 0) { for (n = -expn, fac = 1.0; n; n >>= 1, p2 *= p2) if (n & 1) fac *= p2; ans /= fac; } else { for (n = expn, fac = 1.0; n; n >>= 1, p2 *= p2) if (n & 1) fac *= p2; ans *= fac; } } } goto done; } for ( ; *p >= '0' && *p <= '9'; p++, ndigits++) ans = 10*ans + (*p - '0'); if (*p == dec) for (p++; *p >= '0' && *p <= '9'; p++, ndigits++, expn--) ans = 10*ans + (*p - '0'); if (ndigits == 0) { ans = NA_REAL; p = str; /* back out */ goto done; } strtod_EXACT_CLAUSE; if (*p == 'e' || *p == 'E') { int expsign = 1; switch(*++p) { case '-': expsign = -1; case '+': p++; default: ; } for (n = 0; *p >= '0' && *p <= '9'; p++) n = (n < MAX_EXPONENT_PREFIX) ? n * 10 + (*p - '0') : n; expn += expsign * n; } /* avoid unnecessary underflow for large negative exponents */ if (expn + ndigits < -300) { for (n = 0; n < ndigits; n++) ans /= 10.0; expn += ndigits; } if (expn < -307) { /* use underflow, not overflow */ for (n = -expn, fac = 1.0; n; n >>= 1, p10 *= p10) if (n & 1) fac /= p10; ans *= fac; } else if (expn < 0) { /* positive powers are exact */ for (n = -expn, fac = 1.0; n; n >>= 1, p10 *= p10) if (n & 1) fac *= p10; ans /= fac; } else if (ans != 0.0) { /* PR#15976: allow big exponents on 0, e.g. 0E4933 */ for (n = expn, fac = 1.0; n; n >>= 1, p10 *= p10) if (n & 1) fac *= p10; ans *= fac; } /* explicit overflow to infinity */ if (ans > DBL_MAX) { if (endptr) *endptr = (char *) p; return (sign > 0) ? R_PosInf : R_NegInf; } done: if (endptr) *endptr = (char *) p; return sign * (double) ans; } double R_strtod4(const char *str, char **endptr, char dec, Rboolean NA) { return R_strtod5(str, endptr, dec, NA, FALSE); } double R_strtod(const char *str, char **endptr) { return R_strtod5(str, endptr, '.', FALSE, FALSE); } double R_atof(const char *str) { return R_strtod5(str, NULL, '.', FALSE, FALSE); } /* enc2native and enc2utf8, but they are the same in a UTF-8 locale */ /* primitive */ SEXP attribute_hidden do_enc2(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP ans, el; R_xlen_t i; Rboolean duped = FALSE; checkArity(op, args); check1arg(args, call, "x"); if (!isString(CAR(args))) errorcall(call, "argument is not a character vector"); ans = CAR(args); for (i = 0; i < XLENGTH(ans); i++) { el = STRING_ELT(ans, i); if (el == NA_STRING) continue; if (PRIMVAL(op) || known_to_be_utf8) { /* enc2utf8 */ if (IS_UTF8(el) || IS_ASCII(el) || IS_BYTES(el)) continue; if (!duped) { ans = PROTECT(duplicate(ans)); duped = TRUE; } SET_STRING_ELT(ans, i, mkCharCE(translateCharUTF8(el), CE_UTF8)); } else if (ENC_KNOWN(el)) { /* enc2native */ if (IS_ASCII(el) || IS_BYTES(el)) continue; if (known_to_be_latin1 && IS_LATIN1(el)) continue; if (!duped) { PROTECT(ans = duplicate(ans)); duped = TRUE; } if (known_to_be_latin1) SET_STRING_ELT(ans, i, mkCharCE(translateChar(el), CE_LATIN1)); else SET_STRING_ELT(ans, i, mkChar(translateChar(el))); } } if(duped) UNPROTECT(1); return ans; } #ifdef USE_ICU # include #ifdef USE_ICU_APPLE /* macOS is missing the headers */ typedef int UErrorCode; /* really an enum these days */ struct UCollator; typedef struct UCollator UCollator; typedef enum { UCOL_EQUAL = 0, UCOL_GREATER = 1, UCOL_LESS = -1 } UCollationResult ; typedef enum { UCOL_DEFAULT = -1, UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2, UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_CE_STRENGTH_LIMIT, UCOL_QUATERNARY=3, UCOL_IDENTICAL=15, UCOL_STRENGTH_LIMIT, UCOL_OFF = 16, UCOL_ON = 17, UCOL_SHIFTED = 20, UCOL_NON_IGNORABLE = 21, UCOL_LOWER_FIRST = 24, UCOL_UPPER_FIRST = 25, UCOL_ATTRIBUTE_VALUE_COUNT } UColAttributeValue; typedef UColAttributeValue UCollationStrength; typedef enum { UCOL_FRENCH_COLLATION, UCOL_ALTERNATE_HANDLING, UCOL_CASE_FIRST, UCOL_CASE_LEVEL, UCOL_NORMALIZATION_MODE, UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, UCOL_STRENGTH, UCOL_HIRAGANA_QUATERNARY_MODE, UCOL_NUMERIC_COLLATION, UCOL_ATTRIBUTE_COUNT } UColAttribute; /* UCharIterator struct has to be defined since we use its instances as local variables, but we don't actually use any of its members. */ typedef struct UCharIterator { const void *context; int32_t length, start, index, limit, reservedField; void *fns[16]; /* we overshoot here (there is just 10 fns in ICU 3.6), but we have to make sure that enough stack space is allocated when used as a local var in future versions */ } UCharIterator; UCollator* ucol_open(const char *loc, UErrorCode *status); void ucol_close(UCollator *coll); void ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); void ucol_setStrength(UCollator *coll, UCollationStrength strength); UCollationResult ucol_strcollIter(const UCollator *coll, UCharIterator *sIter, UCharIterator *tIter, UErrorCode *status); void uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); void uloc_setDefault(const char* localeID, UErrorCode* status); typedef enum { ULOC_ACTUAL_LOCALE = 0, ULOC_VALID_LOCALE = 1, ULOC_DATA_LOCALE_TYPE_LIMIT = 3 } ULocDataLocaleType ; const char* ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); #define U_ZERO_ERROR 0 #define U_FAILURE(x) ((x)>U_ZERO_ERROR) #define ULOC_ACTUAL_LOCALE 0 #else #include #include #include #include #endif static UCollator *collator = NULL; static int collationLocaleSet = 0; /* called from platform.c */ void attribute_hidden resetICUcollator(void) { if (collator) ucol_close(collator); collator = NULL; collationLocaleSet = 0; } static const struct { const char * const str; int val; } ATtable[] = { { "case_first", UCOL_CASE_FIRST }, { "upper", UCOL_UPPER_FIRST }, { "lower", UCOL_LOWER_FIRST }, { "default ", UCOL_DEFAULT }, { "strength", 999 }, { "primary ", UCOL_PRIMARY }, { "secondary ", UCOL_SECONDARY }, { "teritary ", UCOL_TERTIARY }, { "guaternary ", UCOL_QUATERNARY }, { "identical ", UCOL_IDENTICAL }, { "french_collation", UCOL_FRENCH_COLLATION }, { "on", UCOL_ON }, { "off", UCOL_OFF }, { "normalization", UCOL_NORMALIZATION_MODE }, { "alternate_handling", UCOL_ALTERNATE_HANDLING }, { "non_ignorable", UCOL_NON_IGNORABLE }, { "shifted", UCOL_SHIFTED }, { "case_level", UCOL_CASE_LEVEL }, { "hiragana_quaternary", UCOL_HIRAGANA_QUATERNARY_MODE }, { NULL, 0 } }; #ifdef Win32 #define BUFFER_SIZE 512 typedef int (WINAPI *PGSDLN)(LPWSTR, int); static const char *getLocale(void) { const char *p = getenv("R_ICU_LOCALE"); if (p && p[0]) return p; // This call is >= Vista/Server 2008 // ICU should accept almost all of these, e.g. en-US and uz-Latn-UZ PGSDLN pGSDLN = (PGSDLN) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetSystemDefaultLocaleName"); if(pGSDLN) { WCHAR wcBuffer[BUFFER_SIZE]; pGSDLN(wcBuffer, BUFFER_SIZE); static char locale[BUFFER_SIZE]; WideCharToMultiByte(CP_ACP, 0, wcBuffer, -1, locale, BUFFER_SIZE, NULL, NULL); return locale; } else return "root"; } #else static const char *getLocale(void) { const char *p = getenv("R_ICU_LOCALE"); return (p && p[0]) ? p : setlocale(LC_COLLATE, NULL); } #endif SEXP attribute_hidden do_ICUset(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP x; UErrorCode status = U_ZERO_ERROR; for (; args != R_NilValue; args = CDR(args)) { if (isNull(TAG(args))) error(_("all arguments must be named")); const char *this = CHAR(PRINTNAME(TAG(args))); const char *s; x = CAR(args); if (!isString(x) || LENGTH(x) != 1) error(_("invalid '%s' argument"), this); s = CHAR(STRING_ELT(x, 0)); if (streql(this, "locale")) { if (collator) { ucol_close(collator); collator = NULL; } if(streql(s, "ASCII")) { collationLocaleSet = 2; } else { if(strcmp(s, "none")) { if(streql(s, "default")) uloc_setDefault(getLocale(), &status); else uloc_setDefault(s, &status); if(U_FAILURE(status)) error("failed to set ICU locale %s (%d)", s, status); collator = ucol_open(NULL, &status); if (U_FAILURE(status)) { collator = NULL; error("failed to open ICU collator (%d)", status); } } collationLocaleSet = 1; } } else { int i, at = -1, val = -1; for (i = 0; ATtable[i].str; i++) if (streql(this, ATtable[i].str)) { at = ATtable[i].val; break; } for (i = 0; ATtable[i].str; i++) if (streql(s, ATtable[i].str)) { val = ATtable[i].val; break; } if (collator && at == 999 && val >= 0) { ucol_setStrength(collator, val); } else if (collator && at >= 0 && val >= 0) { ucol_setAttribute(collator, at, val, &status); if (U_FAILURE(status)) error("failed to set ICU collator attribute"); } } } return R_NilValue; } SEXP attribute_hidden do_ICUget(SEXP call, SEXP op, SEXP args, SEXP rho) { const char *ans = "unknown", *res; checkArity(op, args); if (collationLocaleSet == 2) { ans = "ASCII"; } else if(collator) { UErrorCode status = U_ZERO_ERROR; int type = asInteger(CAR(args)); if (type < 1 || type > 2) error(_("invalid '%s' value"), "type"); res = ucol_getLocaleByType(collator, type == 1 ? ULOC_ACTUAL_LOCALE : ULOC_VALID_LOCALE, &status); if(!U_FAILURE(status) && res) ans = res; } else ans = "ICU not in use"; return mkString(ans); } /* Caller has to manage the R_alloc stack */ /* NB: strings can have equal collation weight without being identical */ attribute_hidden int Scollate(SEXP a, SEXP b) { if (!collationLocaleSet) { int errsv = errno; /* OSX may set errno in the operations below. */ collationLocaleSet = 1; #ifndef Win32 if (strcmp("C", getLocale()) ) { #else const char *p = getenv("R_ICU_LOCALE"); if(p && p[0]) { #endif UErrorCode status = U_ZERO_ERROR; uloc_setDefault(getLocale(), &status); if(U_FAILURE(status)) error("failed to set ICU locale (%d)", status); collator = ucol_open(NULL, &status); if (U_FAILURE(status)) { collator = NULL; error("failed to open ICU collator (%d)", status); } } errno = errsv; } if (collator == NULL) return collationLocaleSet == 2 ? strcmp(translateChar(a), translateChar(b)) : strcoll(translateChar(a), translateChar(b)); UCharIterator aIter, bIter; const char *as = translateCharUTF8(a), *bs = translateCharUTF8(b); int len1 = (int) strlen(as), len2 = (int) strlen(bs); uiter_setUTF8(&aIter, as, len1); uiter_setUTF8(&bIter, bs, len2); UErrorCode status = U_ZERO_ERROR; int result = ucol_strcollIter(collator, &aIter, &bIter, &status); if (U_FAILURE(status)) error("could not collate using ICU"); return result; } #else /* not USE_ICU */ SEXP attribute_hidden do_ICUset(SEXP call, SEXP op, SEXP args, SEXP rho) { warning(_("ICU is not supported on this build")); return R_NilValue; } SEXP attribute_hidden do_ICUget(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); return mkString("ICU not in use"); } void attribute_hidden resetICUcollator(void) {} # ifdef Win32 static int Rstrcoll(const char *s1, const char *s2) { R_CheckStack2(sizeof(wchar_t) * (2 + strlen(s1) + strlen(s2))); wchar_t w1[strlen(s1)+1], w2[strlen(s2)+1]; utf8towcs(w1, s1, strlen(s1)); utf8towcs(w2, s2, strlen(s2)); return wcscoll(w1, w2); } int Scollate(SEXP a, SEXP b) { if(getCharCE(a) == CE_UTF8 || getCharCE(b) == CE_UTF8) return Rstrcoll(translateCharUTF8(a), translateCharUTF8(b)); else return strcoll(translateChar(a), translateChar(b)); } # else attribute_hidden int Scollate(SEXP a, SEXP b) { return strcoll(translateChar(a), translateChar(b)); } # endif #endif #include SEXP attribute_hidden do_crc64(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP in = CAR(args); uint64_t crc = 0; char ans[17]; if (!isString(in)) error("input must be a character string"); const char *str = CHAR(STRING_ELT(in, 0)); /* Seems this is really 64-bit only on 64-bit platforms */ crc = lzma_crc64((uint8_t *)str, strlen(str), crc); snprintf(ans, 17, "%lx", (long unsigned int) crc); return mkString(ans); } static void bincode(double *x, R_xlen_t n, double *breaks, int nb, int *code, int right, int include_border) { int lo, hi, nb1 = nb - 1, new; int lft = !right; /* This relies on breaks being sorted, so wise to check that */ for(int i = 1; i < nb; i++) if(breaks[i-1] > breaks[i]) error(_("'breaks' is not sorted")); for(R_xlen_t i = 0; i < n; i++) { code[i] = NA_INTEGER; if(!ISNAN(x[i])) { lo = 0; hi = nb1; if(x[i] < breaks[lo] || breaks[hi] < x[i] || (x[i] == breaks[lft ? hi : lo] && ! include_border)) ; else { while(hi - lo >= 2) { new = (hi + lo)/2; if(x[i] > breaks[new] || (lft && x[i] == breaks[new])) lo = new; else hi = new; } code[i] = lo + 1; } } } } /* 'breaks' cannot be a long vector as the return codes are integer. */ SEXP attribute_hidden do_bincode(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP x, breaks, right, lowest; x = CAR(args); args = CDR(args); breaks = CAR(args); args = CDR(args); right = CAR(args); args = CDR(args); lowest = CAR(args); #ifdef LONG_VECTOR_SUPPORT if (IS_LONG_VEC(breaks)) error(_("long vector '%s' is not supported"), "breaks"); #endif PROTECT(x = coerceVector(x, REALSXP)); PROTECT(breaks = coerceVector(breaks, REALSXP)); R_xlen_t n = XLENGTH(x); int nB = LENGTH(breaks), sr = asLogical(right), sl = asLogical(lowest); if (nB == NA_INTEGER) error(_("invalid '%s' argument"), "breaks"); if (sr == NA_INTEGER) error(_("invalid '%s' argument"), "right"); if (sl == NA_INTEGER) error(_("invalid '%s' argument"), "include.lowest"); SEXP codes; PROTECT(codes = allocVector(INTSXP, n)); bincode(REAL(x), n, REAL(breaks), nB, INTEGER(codes), sr, sl); UNPROTECT(3); return codes; } SEXP attribute_hidden do_tabulate(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP in = CAR(args), nbin = CADR(args); if (TYPEOF(in) != INTSXP) error("invalid input"); R_xlen_t n = XLENGTH(in); int nb = asInteger(nbin); if (nb == NA_INTEGER || nb < 0) error(_("invalid '%s' argument"), "nbin"); int *x = INTEGER(in); SEXP ans; #ifdef LONG_VECTOR_SUPPORT if (n > INT_MAX) { ans = allocVector(REALSXP, nb); double *y = REAL(ans); if (nb) memset(y, 0, nb * sizeof(double)); for(R_xlen_t i = 0 ; i < n ; i++) if (x[i] != NA_INTEGER && x[i] > 0 && x[i] <= nb) y[x[i] - 1]++; } else #endif { ans = allocVector(INTSXP, nb); int *y = INTEGER(ans); if (nb) memset(y, 0, nb * sizeof(int)); for(R_xlen_t i = 0 ; i < n ; i++) if (x[i] != NA_INTEGER && x[i] > 0 && x[i] <= nb) y[x[i] - 1]++; } return ans; } /* Note: R's findInterval( x , vec, ...) has first two arguments swapped ! * .Internal(findInterval(vec, x, rightmost.closed, all.inside, left.open)) * xt x right inside leftOp * x can be a long vector but xt cannot since the result is integer */ SEXP attribute_hidden do_findinterval(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP xt, x, right, inside, leftOp; xt = CAR(args); args = CDR(args); x = CAR(args); args = CDR(args); right = CAR(args); args = CDR(args); inside = CAR(args);args = CDR(args); leftOp = CAR(args); if(TYPEOF(xt) != REALSXP || TYPEOF(x) != REALSXP) error("invalid input"); #ifdef LONG_VECTOR_SUPPORT if (IS_LONG_VEC(xt)) error(_("long vector '%s' is not supported"), "vec"); #endif int n = LENGTH(xt); if (n == NA_INTEGER) error(_("invalid '%s' argument"), "vec"); R_xlen_t nx = XLENGTH(x); int sr = asLogical(right), si = asLogical(inside), lO = asLogical(leftOp); if (sr == NA_INTEGER) error(_("invalid '%s' argument"), "rightmost.closed"); if (si == NA_INTEGER) error(_("invalid '%s' argument"), "all.inside"); SEXP ans = allocVector(INTSXP, nx); double *rxt = REAL(xt), *rx = REAL(x); int ii = 1; for(int i = 0; i < nx; i++) { if (ISNAN(rx[i])) ii = NA_INTEGER; else { int mfl; ii = findInterval2(rxt, n, rx[i], sr, si, lO, ii, &mfl); // -> ../appl/interv.c } INTEGER(ans)[i] = ii; } return ans; } #ifdef Win32 // this includes RS.h # undef ERROR #endif #include SEXP attribute_hidden do_pretty(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP ans, nm, hi; double l = asReal(CAR(args)); args = CDR(args); if (!R_FINITE(l)) error(_("invalid '%s' argument"), "l"); double u = asReal(CAR(args)); args = CDR(args); if (!R_FINITE(u)) error(_("invalid '%s' argument"), "u"); int n = asInteger(CAR(args)); args = CDR(args); if (n == NA_INTEGER || n < 0) error(_("invalid '%s' argument"), "n"); int min_n = asInteger(CAR(args)); args = CDR(args); if (min_n == NA_INTEGER || min_n < 0 || min_n > n) error(_("invalid '%s' argument"), "min.n"); double shrink = asReal(CAR(args)); args = CDR(args); if (!R_FINITE(shrink) || shrink <= 0.) error(_("invalid '%s' argument"), "shrink.sml"); PROTECT(hi = coerceVector(CAR(args), REALSXP)); args = CDR(args); double z; if (!R_FINITE(z = REAL(hi)[0]) || z < 0.) error(_("invalid '%s' argument"), "high.u.bias"); if (!R_FINITE(z = REAL(hi)[1]) || z < 0.) error(_("invalid '%s' argument"), "u5.bias"); int eps = asInteger(CAR(args)); /* eps.correct */ if (eps == NA_INTEGER || eps < 0 || eps > 2) error(_("'eps.correct' must be 0, 1, or 2")); R_pretty(&l, &u, &n, min_n, shrink, REAL(hi), eps, 1); PROTECT(ans = allocVector(VECSXP, 3)); SET_VECTOR_ELT(ans, 0, ScalarReal(l)); SET_VECTOR_ELT(ans, 1, ScalarReal(u)); SET_VECTOR_ELT(ans, 2, ScalarInteger(n)); nm = allocVector(STRSXP, 3); setAttrib(ans, R_NamesSymbol, nm); SET_STRING_ELT(nm, 0, mkChar("l")); SET_STRING_ELT(nm, 1, mkChar("u")); SET_STRING_ELT(nm, 2, mkChar("n")); UNPROTECT(2); return ans; } /* r <- .Internal(formatC(x, as.character(mode), width, digits, as.character(format), as.character(flag), i.strlen)) */ static void str_signif(void *x, R_xlen_t n, const char *type, int width, int digits, const char *format, const char *flag, char **result); SEXP attribute_hidden do_formatC(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP x = CAR(args); args = CDR(args); if (!isVector(x)) error(_("'x' must be a vector")); R_xlen_t n = XLENGTH(x); const char *type = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args); int width = asInteger(CAR(args)); args = CDR(args); int digits = asInteger(CAR(args)); args = CDR(args); const char *fmt = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args); const char *flag = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args); SEXP i_strlen = PROTECT(coerceVector(CAR(args), INTSXP)); char **cptr = (char **) R_alloc(n, sizeof(char*)); for (R_xlen_t i = 0; i < n; i++) { int ix = INTEGER(i_strlen)[i] + 2; cptr[i] = (char *) R_alloc(ix + 1, sizeof(char)); memset(cptr[i], ' ', ix); cptr[i][ix] = 0; } void *px = NULL /* -Wall */; switch(TYPEOF(x)) { case INTSXP: px = INTEGER(x); break; case REALSXP: px = REAL(x); break; default: error("unsupported type "); } str_signif(px, n, type, width, digits, fmt, flag, cptr); SEXP ans = PROTECT(allocVector(STRSXP, n)); for (R_xlen_t i = 0; i < n; i++) SET_STRING_ELT(ans, i, mkChar(cptr[i])); UNPROTECT(2); return ans; } /* Former src/appl/strsignif.c * * Copyright (C) Martin Maechler, 1994, 1998 * Copyright (C) 2001-2013 the R Core Team * * I want you to preserve the copyright of the original author(s), * and encourage you to send me any improvements by e-mail. (MM). * * Originally from Bill Dunlap * bill@stat.washington.edu * Wed Feb 21, 1990 * * Much improved by Martin Maechler, including the "fg" format. * * Patched by Friedrich.Leisch@ci.tuwien.ac.at * Fri Nov 22, 1996 * * Some fixes by Ross Ihaka * ihaka@stat.auckland.ac.nz * Sat Dec 21, 1996 * Integer arguments changed from "long" to "int" * Bus error due to non-writable strings fixed * * BDR 2001-10-30 use R_alloc not Calloc as memory was not * reclaimed on error (and there are many error exits). * * type "double" or "integer" (R - numeric 'mode'). * * width The total field width; width < 0 means to left justify * the number in this field (equivalent to flag = "-"). * It is possible that the result will be longer than this, * but that should only happen in reasonable cases. * * digits The desired number of digits after the decimal point. * digits < 0 uses the default for C, namely 6 digits. * * format "d" (for integers) or "f", "e","E", "g", "G" (for 'real') * "f" gives numbers in the usual "xxx.xxx" format; * "e" and "E" give n.ddde or n.dddE (scientific format); * "g" and "G" puts them into scientific format if it saves * space to do so. * NEW: "fg" gives numbers in "xxx.xxx" format as "f", * ~~ however, digits are *significant* digits and, * if digits > 0, no trailing zeros are produced, as in "g". * * flag Format modifier as in K&R "C", 2nd ed., p.243; * e.g., "0" pads leading zeros; "-" does left adjustment * the other possible flags are "+", " ", and "#". * New (Feb.98): if flag has more than one character, all are passed.. */ /* char here is either ASCII or handled as a whole */ #ifdef Win32 /* avoid latest MinGW's redefinition in stdio.h */ #include #endif #include /* fround */ static void str_signif(void *x, R_xlen_t n, const char *type, int width, int digits, const char *format, const char *flag, char **result) { int dig = abs(digits); Rboolean rm_trailing_0 = digits >= 0; Rboolean do_fg = !strcmp("fg", format); /* TRUE iff format == "fg" */ double xx; int iex; size_t j, len_flag = strlen(flag); const void *vmax = vmaxget(); char *f0 = R_alloc((size_t) do_fg ? 1+1+len_flag+3 : 1, sizeof(char)); char *form = R_alloc((size_t) 1+1+len_flag+3 + strlen(format), sizeof(char)); if (width == 0) error("width cannot be zero"); if (strcmp("d", format) == 0) { if (len_flag == 0) strcpy(form, "%*d"); else { strcpy(form, "%"); strcat(form, flag); strcat(form, "*d"); } if (strcmp("integer", type) == 0) for (R_xlen_t i = 0; i < n; i++) snprintf(result[i], strlen(result[i]) + 1, form, width, ((int *)x)[i]); else error("'type' must be \"integer\" for \"d\"-format"); } else { /* --- floating point --- */ if (len_flag == 0) strcpy(form, "%*.*"); else { strcpy(form, "%"); strcat(form, flag); strcat(form, "*.*"); } if(do_fg) { strcpy(f0, "%"); strcat(f0, flag); strcat(f0, ".*f"); strcat(form, "g"); } else strcat(form, format); #ifdef DEBUG fprintf(stderr, "strsignif.c: form='%s', width=%d, dig=%d\n", form, width, dig); if(do_fg) fprintf(stderr, "\t\"fg\": f0='%s'.", f0); #endif if (strcmp("double", type) == 0) { if(do_fg) /* do smart "f" : */ for (R_xlen_t i = 0; i < n; i++) { xx = ((double *)x)[i]; if(xx == 0.) strcpy(result[i], "0"); else { /* This was iex= (int)floor(log10(fabs(xx))) That's wrong, as xx might get rounded up, and we do need some fuzz or 99.5 is correct. */ double xxx = fabs(xx), X; iex = (int)floor(log10(xxx) + 1e-12); X = fround(xxx/Rexp10((double)iex) + 1e-12, (double)(dig-1)); if(iex > 0 && X >= 10) { xx = X * Rexp10((double)iex); iex++; } if(iex == -4 && fabs(xx)< 1e-4) {/* VERY rare case */ iex = -5; } if(iex < -4) { /* "g" would result in 'e-' representation:*/ snprintf(result[i], strlen(result[i]) + 1, f0, dig-1 + -iex, xx); #ifdef DEBUG fprintf(stderr, " x[%d]=%g, iex=%d\n", i, xx, iex); fprintf(stderr, "\tres. = '%s'; ", result[i]); #endif /* Remove trailing "0"s __ IFF flag has no '#': */ if(rm_trailing_0) { j = strlen(result[i])-1; #ifdef DEBUG int jL = j; #endif while(result[i][j] == '0') j--; result[i][j+1] = '\0'; #ifdef DEBUG fprintf(stderr, "\t>>> jL=%d, j=%d; new res= '%s'\n", jL, j, result[i]); #endif } } else { /* iex >= -4: NOT "e-" */ /* if iex >= dig, would have "e+" representation */ #ifdef DEBUG fprintf(stderr, "\t iex >= -4; using %d for 'dig'\n", (iex >= dig) ? (iex+1) : dig); #endif snprintf(result[i], strlen(result[i]) + 1, form, width, (iex >= dig) ? (iex+1) : dig, xx); } } /* xx != 0 */ } /* if(do_fg) for(i..) */ else for (R_xlen_t i = 0; i < n; i++) snprintf(result[i], strlen(result[i]) + 1, form, width, dig, ((double *)x)[i]); } else error("'type' must be \"real\" for this format"); } vmaxset(vmax); }