/* * R : A Computer Language for Statistical Data Analysis * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1998-2023 The R Core Team. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, a copy is available at * https://www.R-project.org/Licenses/ */ /* byte-level access needed checks. OK in UTF-8 provided quotes, comment, sep and dec chars are ASCII. Also OK in DBCS. We use only ' ', tab, CR, LF as space chars. There is also the possibility of other digits (which we should probably continue to ignore). */ #ifdef HAVE_CONFIG_H #include #endif #define R_USE_SIGNALS 1 #include #include #include /* for DBL_DIG */ #include #include #include #include #include /* for btowc */ #ifdef Win32 #include /* for %lld */ #endif /* The size of vector initially allocated by scan */ #define SCAN_BLOCKSIZE 1000 /* The size of the console buffer */ /* NB: in Windows this also needs to be set in gnuwin32/getline/getline.c */ #define CONSOLE_PROMPT_SIZE 256 #define NO_COMCHAR 100000 /* won't occur even in Unicode */ /* The number of distinct strings to track */ #define MAX_STRINGS 10000 static unsigned char ConsoleBuf[CONSOLE_BUFFER_SIZE+1], *ConsoleBufp; static int ConsoleBufCnt; static char ConsolePrompt[CONSOLE_PROMPT_SIZE]; typedef struct { SEXP NAstrings; int quiet; int sepchar; /* = 0 */ /* This gets compared to ints */ char decchar; /* = '.' */ /* This only gets compared to chars */ char *quoteset; /* = NULL */ int comchar; /* = NO_COMCHAR */ int ttyflag; /* = 0 */ Rconnection con; /* = NULL */ Rboolean wasopen; /* = FALSE */ Rboolean escapes; /* = FALSE */ int save; /* = 0; */ Rboolean isLatin1; /* = FALSE */ Rboolean isUTF8; /* = FALSE */ Rboolean atStart; Rboolean embedWarn; Rboolean skipNul; char convbuf[100]; } LocalData; static SEXP insertString(char *str, LocalData *l) { cetype_t enc = CE_NATIVE; if (l->con->UTF8out || l->isUTF8) enc = CE_UTF8; else if (l->isLatin1) enc = CE_LATIN1; return mkCharCE(str, enc); } static R_INLINE Rboolean Rspace(unsigned int c) { if (c == ' ' || c == '\t' || c == '\n' || c == '\r') return TRUE; #ifdef Win32 /* 0xa0 is NBSP in all 8-bit Windows locales */ if(!mbcslocale && c == 0xa0) return TRUE; #else /* 0xa0 is NBSP in Latin-1 */ if(known_to_be_latin1 && c == 0xa0) return TRUE; #endif return FALSE; } /* used by readline() and menu() */ static int ConsoleGetchar(void) { if (--ConsoleBufCnt < 0) { ConsoleBuf[CONSOLE_BUFFER_SIZE] = '\0'; if (R_ReadConsole(ConsolePrompt, ConsoleBuf, CONSOLE_BUFFER_SIZE, 0) == 0) { R_ClearerrConsole(); return R_EOF; } ConsoleBufp = ConsoleBuf; ConsoleBufCnt = (int) strlen((char *)ConsoleBuf); ConsoleBufCnt--; } /* at this point we need to use unsigned char or similar */ return (int) *ConsoleBufp++; } /* used by scan() */ static int ConsoleGetcharWithPushBack(Rconnection con) { char *curLine; int c; if(con->nPushBack > 0) { curLine = con->PushBack[con->nPushBack-1]; c = curLine[con->posPushBack++]; if(con->posPushBack >= strlen(curLine)) { /* last character on a line, so pop the line */ free(curLine); con->nPushBack--; con->posPushBack = 0; if(con->nPushBack == 0) free(con->PushBack); } return c; } else return ConsoleGetchar(); } /* Like strtol, but for ints not longs and returns NA_INTEGER on overflow */ static int Strtoi(const char *nptr, int base) { long res; char *endp; errno = 0; res = strtol(nptr, &endp, base); if (*endp != '\0') res = NA_INTEGER; /* next can happen on a 64-bit platform */ if (res > INT_MAX || res < INT_MIN) res = NA_INTEGER; if (errno == ERANGE) res = NA_INTEGER; return (int) res; } static double Strtod (const char *nptr, char **endptr, Rboolean NA, LocalData *d) { return R_strtod4(nptr, endptr, d->decchar, NA); } static Rcomplex strtoc(const char *nptr, char **endptr, Rboolean NA, LocalData *d) { Rcomplex z; double x, y; char *s, *endp; x = Strtod(nptr, &endp, NA, d); if (isBlankString(endp)) { z.r = x; z.i = 0; } else if (*endp == 'i') { z.r = 0; z.i = x; endp++; } else { s = endp; y = Strtod(s, &endp, NA, d); if (*endp == 'i') { z.r = x; z.i = y; endp++; } else { z.r = 0; z.i = 0; endp = (char *) nptr; /* -Wall */ } } *endptr = endp; return z; } static Rbyte strtoraw (const char *nptr, char **endptr) { const char *p = nptr; int i, val = 0; /* should have whitespace plus exactly 2 hex digits */ while(Rspace(*p)) p++; for(i = 1; i <= 2; i++, p++) { val *= 16; if(*p >= '0' && *p <= '9') val += *p - '0'; else if (*p >= 'A' && *p <= 'F') val += *p - 'A' + 10; else if (*p >= 'a' && *p <= 'f') val += *p - 'a' + 10; else {val = 0; break;} } *endptr = (char *) p; return (Rbyte) val; } static R_INLINE int scanchar_raw(LocalData *d) { int c = (d->ttyflag) ? ConsoleGetcharWithPushBack(d->con) : Rconn_fgetc(d->con); if(c == 0) { if(d->skipNul) { do { c = (d->ttyflag) ? ConsoleGetcharWithPushBack(d->con) : Rconn_fgetc(d->con); } while(c == 0); } else d->embedWarn = TRUE; } return c; } static R_INLINE void unscanchar(int c, LocalData *d) { d->save = c; } /* For second bytes in a DBCS: should not be called when a char is saved, but be cautious */ static R_INLINE int scanchar2(LocalData *d) { int next; if (d->save) { next = d->save; d->save = 0; } else next = scanchar_raw(d); return next; } static int scanchar(Rboolean inQuote, LocalData *d) { int next; if (d->save) { next = d->save; d->save = 0; } else next = scanchar_raw(d); if(next == d->comchar && !inQuote) { do next = scanchar_raw(d); while (next != '\n' && next != R_EOF); } if(next == '\\' && d->escapes) { next = scanchar_raw(d); if ('0' <= next && next <= '8') { int octal = next - '0'; if ('0' <= (next = scanchar_raw(d)) && next <= '8') { octal = 8 * octal + next - '0'; if ('0' <= (next = scanchar_raw(d)) && next <= '8') { octal = 8 * octal + next - '0'; } else unscanchar(next, d); } else unscanchar(next, d); next = octal; } else switch(next) { case 'a': next = '\a'; break; case 'b': next = '\b'; break; case 'f': next = '\f'; break; case 'n': next = '\n'; break; case 'r': next = '\r'; break; case 't': next = '\t'; break; case 'v': next = '\v'; break; case 'x': { int val = 0; int i, ext; for(i = 0; i < 2; i++) { next = scanchar_raw(d); if(next >= '0' && next <= '9') ext = next - '0'; else if (next >= 'A' && next <= 'F') ext = next - 'A' + 10; else if (next >= 'a' && next <= 'f') ext = next - 'a' + 10; else {unscanchar(next, d); break;} val = 16*val + ext; } next = val; } break; default: /* Any other char and even EOF escapes to itself, but we need to preserve \" etc inside quotes. */ if(inQuote && strchr(d->quoteset, next)) { unscanchar(next, d); next = '\\'; } break; } } return next; } /* utility to close connections after interrupts */ static void scan_cleanup(void *data) { LocalData *ld = data; if(ld->con && !ld->ttyflag && !ld->wasopen) { ld->con->close(ld->con); ld->con = NULL; } if(ld->quoteset && ld->quoteset[0]) { free(ld->quoteset); ld->quoteset = NULL; } } #include "RBufferUtils.h" /*XX Can we pass this routine an R_StringBuffer? appears so. But do we have to worry about continuation lines and whatever is currently in the buffer before we call this? In other words, what if this appends to the existing content. Appears it writes in directly at position 0. */ static char * fillBuffer(SEXPTYPE type, int strip, int *bch, LocalData *d, R_StringBuffer *buffer) { /* The basic reader function, called from scanVector() and scanFrame(). Reads into _buffer_ which later will be read out by extractItem(). bch is used to distinguish \r, \n and EOF from more input available. */ char *bufp; int c, quote, filled, nbuf = MAXELTSIZE, m, mm = 0; Rboolean dbcslocale = (R_MB_CUR_MAX == 2) && !d->isUTF8 && !d->isLatin1; m = 0; filled = 1; if (d->sepchar == 0) { /* skip all space or tabs: only look at lead bytes here */ strip = 0; /* documented to be ignored in this case */ while ((c = scanchar(FALSE, d)) == ' ' || c == '\t') ; if (c == '\n' || c == '\r' || c == R_EOF) { filled = c; goto donefill; } if ((type == STRSXP || type == NILSXP) && strchr(d->quoteset, c)) { quote = c; while ((c = scanchar(TRUE, d)) != R_EOF && c != quote) { if (m >= nbuf - 3) { nbuf *= 2; R_AllocStringBuffer(nbuf, buffer); } if (c == '\\') { /* If this is an embedded quote, unquote it, but otherwise keep backslashes */ c = scanchar(TRUE, d); if (c == R_EOF) break; if(c != quote) buffer->data[m++] = '\\'; } buffer->data[m++] = (char) c; if(dbcslocale && btowc(c) == WEOF) buffer->data[m++] = (char) scanchar2(d); } if (c == R_EOF) warning(_("EOF within quoted string")); c = scanchar(FALSE, d); mm = m; } else { /* not a quoted char string */ do { if (m >= nbuf - 3) { nbuf *= 2; R_AllocStringBuffer(nbuf, buffer); } buffer->data[m++] = (char) c; if(dbcslocale && btowc(c) == WEOF) buffer->data[m++] = (char) scanchar2(d); c = scanchar(FALSE, d); } while (!Rspace(c) && c != R_EOF); } /* skip all space or tabs: only look at lead bytes here */ while (c == ' ' || c == '\t') c = scanchar(FALSE, d); if (c == '\n' || c == '\r' || c == R_EOF) filled = c; else unscanchar(c, d); } else { /* have separator */ while ((c = scanchar(FALSE, d)) != d->sepchar && c != '\n' && c != '\r' && c != R_EOF) { /* eat white space */ if (type != STRSXP) while (c == ' ' || c == '\t') if ((c = scanchar(FALSE, d)) == d->sepchar || c == '\n' || c == '\r' || c == R_EOF) { filled = c; goto donefill; } /* CSV style quoted string handling */ if ((type == STRSXP || type == NILSXP) && c != 0 && strchr(d->quoteset, c)) { quote = c; inquote: while ((c = scanchar(TRUE, d)) != R_EOF && c != quote) { if (m >= nbuf - 3) { nbuf *= 2; R_AllocStringBuffer(nbuf, buffer); } buffer->data[m++] = (char) c; if(dbcslocale && btowc(c) == WEOF) buffer->data[m++] = (char) scanchar2(d); } if (c == R_EOF) warning(_("EOF within quoted string")); c = scanchar(TRUE, d); /* only peek at lead byte unless ASCII */ if (c == quote) { if (m >= nbuf - 3) { nbuf *= 2; R_AllocStringBuffer(nbuf, buffer); } buffer->data[m++] = (char) quote; goto inquote; /* FIXME: Ick! Clean up logic */ } mm = m; if (c == d->sepchar || c == '\n' || c == '\r' || c == R_EOF){ filled = c; goto donefill; } else { unscanchar(c, d); continue; } } /* end of CSV-style quote handling */ if (!strip || m > 0 || !Rspace(c)) { /* only lead byte */ if (m >= nbuf - 3) { nbuf *= 2; R_AllocStringBuffer(nbuf, buffer); } buffer->data[m++] = (char) c; if(dbcslocale && btowc(c) == WEOF) buffer->data[m++] = (char) scanchar2(d); } } filled = c; /* last lead byte in a DBCS */ } donefill: /* strip trailing white space, if desired and if item is non-null */ bufp = &buffer->data[m]; if (strip && m > mm) { do {c = (int)*--bufp;} while(m-- > mm && Rspace(c)); bufp++; } *bufp = '\0'; /* Remove UTF-8 BOM */ if(d->atStart && utf8locale && !strncmp(buffer->data, "\xef\xbb\xbf", 3)) memmove(buffer->data, buffer->data+3, strlen(buffer->data) + 1); d->atStart = FALSE; *bch = filled; return buffer->data; } /* If mode = 0 use for numeric fields where "" is NA If mode = 1 use for character fields where "" is verbatim unless na.strings includes "" */ static R_INLINE int isNAstring(const char *buf, int mode, LocalData *d) { int i; if(!mode && strlen(buf) == 0) return 1; for (i = 0; i < length(d->NAstrings); i++) if (!strcmp(CHAR(STRING_ELT(d->NAstrings, i)), buf)) return 1; return 0; } NORET static R_INLINE void expected(char *what, char *got, LocalData *d) { int c; if (d->ttyflag) { /* This is safe in a MBCS */ while ((c = scanchar(FALSE, d)) != R_EOF && c != '\n') ; } error(_("scan() expected '%s', got '%s'"), what, got); } static void extractItem(char *buffer, SEXP ans, R_xlen_t i, LocalData *d) { char *endp; switch(TYPEOF(ans)) { case NILSXP: break; case LGLSXP: if (isNAstring(buffer, 0, d)) LOGICAL(ans)[i] = NA_INTEGER; else { int tr = StringTrue(buffer), fa = StringFalse(buffer); if(tr || fa) LOGICAL(ans)[i] = tr; else expected("a logical", buffer, d); } break; case INTSXP: if (isNAstring(buffer, 0, d)) INTEGER(ans)[i] = NA_INTEGER; else { INTEGER(ans)[i] = Strtoi(buffer, 10); if (INTEGER(ans)[i] == NA_INTEGER) expected("an integer", buffer, d); } break; case REALSXP: if (isNAstring(buffer, 0, d)) REAL(ans)[i] = NA_REAL; else { REAL(ans)[i] = Strtod(buffer, &endp, TRUE, d); if (!isBlankString(endp)) expected("a real", buffer, d); } break; case CPLXSXP: if (isNAstring(buffer, 0, d)) COMPLEX(ans)[i].r = COMPLEX(ans)[i].i = NA_REAL; else { COMPLEX(ans)[i] = strtoc(buffer, &endp, TRUE, d); if (!isBlankString(endp)) expected("a complex", buffer, d); } break; case STRSXP: if (isNAstring(buffer, 1, d)) SET_STRING_ELT(ans, i, NA_STRING); else SET_STRING_ELT(ans, i, insertString(buffer, d)); break; case RAWSXP: if (isNAstring(buffer, 0, d)) RAW(ans)[i] = 0; else { RAW(ans)[i] = strtoraw(buffer, &endp); if (!isBlankString(endp)) expected("a raw", buffer, d); } break; default: UNIMPLEMENTED_TYPE("extractItem", ans); } } static SEXP scanVector(SEXPTYPE type, R_xlen_t maxitems, R_xlen_t maxlines, int flush, SEXP stripwhite, int blskip, LocalData *d) { SEXP ans, bns; int c, strip, bch, ic; R_xlen_t i, blocksize, linesread, n, nprev; char *buffer; R_StringBuffer strBuf = {NULL, 0, MAXELTSIZE}; if (maxitems > 0) blocksize = maxitems; else blocksize = SCAN_BLOCKSIZE; R_AllocStringBuffer(0, &strBuf); PROTECT(ans = allocVector(type, blocksize)); nprev = 0; n = 0; linesread = 0; bch = 1; if (d->ttyflag) snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, "1: "); strip = asLogical(stripwhite); ic = 9999; for (;;) { if(!ic) { R_CheckUserInterrupt(); ic = 9999; } if (bch == R_EOF) { if (d->ttyflag) R_ClearerrConsole(); break; } else if (bch == '\n') { linesread++; if (linesread == maxlines) break; if (d->ttyflag) snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, "%lld: ", (long long) (n + 1)); nprev = n; } if (n == blocksize) { /* enlarge the vector*/ bns = ans; if(blocksize > R_XLEN_T_MAX/2) error(_("too many items")); blocksize = 2 * blocksize; ans = allocVector(type, blocksize); UNPROTECT(1); PROTECT(ans); copyVector(ans, bns); } buffer = fillBuffer(type, strip, &bch, d, &strBuf); if (nprev == n && strlen(buffer)==0 && ((blskip && bch =='\n') || bch == R_EOF)) { if (d->ttyflag || bch == R_EOF) break; } else { extractItem(buffer, ans, n, d); ic--; if (++n == maxitems) { if (d->ttyflag && bch != '\n') { /* MBCS-safe */ while ((c = scanchar(FALSE, d)) != '\n') ; } break; } } if (flush && (bch != '\n') && (bch != R_EOF)) { /* MBCS-safe */ while ((c = scanchar(FALSE, d)) != '\n' && (c != R_EOF)); bch = c; } } if (!d->quiet) REprintf("Read %lld item%s\n", (long long) n, (n == 1) ? "" : "s"); if (d->ttyflag) ConsolePrompt[0] = '\0'; if (n == 0) { UNPROTECT(1); R_FreeStringBuffer(&strBuf); return allocVector(type,0); } if (n == maxitems) { UNPROTECT(1); R_FreeStringBuffer(&strBuf); return ans; } bns = allocVector(type, n); switch (type) { case LGLSXP: case INTSXP: for (i = 0; i < n; i++) INTEGER(bns)[i] = INTEGER(ans)[i]; break; case REALSXP: for (i = 0; i < n; i++) REAL(bns)[i] = REAL(ans)[i]; break; case CPLXSXP: for (i = 0; i < n; i++) COMPLEX(bns)[i] = COMPLEX(ans)[i]; break; case STRSXP: for (i = 0; i < n; i++) SET_STRING_ELT(bns, i, STRING_ELT(ans, i)); break; case RAWSXP: for (i = 0; i < n; i++) RAW(bns)[i] = RAW(ans)[i]; break; default: UNIMPLEMENTED_TYPEt("scanVector", type); } UNPROTECT(1); R_FreeStringBuffer(&strBuf); return bns; } static SEXP scanFrame(SEXP what, R_xlen_t maxitems, R_xlen_t maxlines, int flush, int fill, SEXP stripwhite, int blskip, int multiline, LocalData *d) { SEXP ans, new, old, w; char *buffer = NULL; int c, strip, bch, ic; R_xlen_t blksize, i, ii, j, n, nc, linesread, colsread; R_xlen_t badline; R_StringBuffer buf = {NULL, 0, MAXELTSIZE}; nc = xlength(what); if (!nc) { error(_("empty 'what' specified")); } if (maxitems > 0) blksize = maxitems; else if (maxlines > 0) blksize = maxlines; else blksize = SCAN_BLOCKSIZE; R_AllocStringBuffer(0, &buf); PROTECT(ans = allocVector(VECSXP, nc)); for (i = 0; i < nc; i++) { w = VECTOR_ELT(what, i); if (!isNull(w)) { if (!isVector(w)) { error(_("invalid '%s' argument"), "what"); } SET_VECTOR_ELT(ans, i, allocVector(TYPEOF(w), blksize)); } } setAttrib(ans, R_NamesSymbol, getAttrib(what, R_NamesSymbol)); n = 0; linesread = 0; colsread = 0; ii = 0; badline = 0; bch = 1; c = 0; /* -Wall */ if (d->ttyflag) snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, "1: "); // we checked its type in do_scan int *lstrip = LOGICAL(stripwhite); Rboolean vec_strip = (xlength(stripwhite) == xlength(what)); strip = lstrip[0]; ic = 999; for (;;) { if(!ic) { R_CheckUserInterrupt(); ic = 999; } if (bch == R_EOF) { if (d->ttyflag) R_ClearerrConsole(); goto done; } else if (bch == '\n') { ic--; linesread++; if (colsread != 0) { if (fill) { buffer[0] = '\0'; for (ii = colsread; ii < nc; ii++) { extractItem(buffer, VECTOR_ELT(ans, ii), n, d); } n++; ii = 0; colsread = 0; } else if (!badline && !multiline) badline = linesread; if(badline && !multiline) error(_("line %lld did not have %lld elements"), (long long) badline, (long long) nc); } if (maxitems > 0 && n >= maxitems) goto done; if (maxlines > 0 && linesread == maxlines) goto done; if (d->ttyflag) snprintf(ConsolePrompt, CONSOLE_PROMPT_SIZE, "%lld: ", (long long) (n + 1)); } if (n == blksize && colsread == 0) { if(blksize > R_XLEN_T_MAX/2) error(_("too many items")); blksize = 2 * blksize; for (i = 0; i < nc; i++) { old = VECTOR_ELT(ans, i); if(!isNull(old)) { new = allocVector(TYPEOF(old), blksize); copyVector(new, old); SET_VECTOR_ELT(ans, i, new); } } } if (vec_strip) strip = lstrip[colsread]; buffer = fillBuffer(TYPEOF(VECTOR_ELT(ans, ii)), strip, &bch, d, &buf); if (colsread == 0 && strlen(buffer) == 0 && ((blskip && bch =='\n') || bch == R_EOF)) { if (d->ttyflag || bch == R_EOF) break; } else { extractItem(buffer, VECTOR_ELT(ans, ii), n, d); ii++; colsread++; /* increment n and reset i after filling a row */ if (colsread == nc) { n++; ii = 0; colsread = 0; if (flush && (bch != '\n') && (bch != R_EOF)) { /* MBCS-safe */ while ((c = scanchar(FALSE, d)) != '\n' && c != R_EOF); bch = c; } } } } done: if (colsread != 0) { if (!fill) warning(_("number of items read is not a multiple of the number of columns")); buffer[0] = '\0'; /* this is an NA */ for (ii = colsread; ii < nc; ii++) { extractItem(buffer, VECTOR_ELT(ans, ii), n, d); } n++; } if (!d->quiet) REprintf("Read %lld record%s\n", (long long) n, (n == 1) ? "" : "s"); if (d->ttyflag) ConsolePrompt[0] = '\0'; for (i = 0; i < nc; i++) { old = VECTOR_ELT(ans, i); new = allocVector(TYPEOF(old), n); switch (TYPEOF(old)) { case LGLSXP: case INTSXP: for (j = 0; j < n; j++) INTEGER(new)[j] = INTEGER(old)[j]; break; case REALSXP: for (j = 0; j < n; j++) REAL(new)[j] = REAL(old)[j]; break; case CPLXSXP: for (j = 0; j < n; j++) COMPLEX(new)[j] = COMPLEX(old)[j]; break; case STRSXP: for (j = 0; j < n; j++) SET_STRING_ELT(new, j, STRING_ELT(old, j)); break; case RAWSXP: for (j = 0; j < n; j++) RAW(new)[j] = RAW(old)[j]; break; case NILSXP: break; default: UNIMPLEMENTED_TYPE("scanFrame", old); } SET_VECTOR_ELT(ans, i, new); } UNPROTECT(1); R_FreeStringBuffer(&buf); return ans; } attribute_hidden SEXP do_scan(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, file, sep, what, stripwhite, dec, quotes, comstr; int c, flush, fill, blskip, multiline, escapes, skipNul; R_xlen_t nmax, nlines, nskip; const char *p, *encoding; RCNTXT cntxt; LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, FALSE, 0, FALSE, FALSE, FALSE, FALSE, FALSE, {FALSE}}; data.NAstrings = R_NilValue; checkArity(op, args); file = CAR(args); args = CDR(args); what = CAR(args); args = CDR(args); nmax = asXLength(CAR(args)); args = CDR(args); sep = CAR(args); args = CDR(args); dec = CAR(args); args = CDR(args); quotes = CAR(args); args = CDR(args); nskip = asXLength(CAR(args)); args = CDR(args); nlines = asXLength(CAR(args)); args = CDR(args); data.NAstrings = CAR(args); args = CDR(args); flush = asLogical(CAR(args)); args = CDR(args); fill = asLogical(CAR(args)); args = CDR(args); stripwhite = CAR(args); args = CDR(args); data.quiet = asLogical(CAR(args)); args = CDR(args); blskip = asLogical(CAR(args)); args = CDR(args); multiline = asLogical(CAR(args)); args = CDR(args); comstr = CAR(args); args = CDR(args); escapes = asLogical(CAR(args));args = CDR(args); if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1) error(_("invalid '%s' argument"), "encoding"); encoding = CHAR(STRING_ELT(CAR(args), 0)); args = CDR(args); /* ASCII */ if(streql(encoding, "latin1")) data.isLatin1 = TRUE; if(streql(encoding, "UTF-8")) data.isUTF8 = TRUE; skipNul = asLogical(CAR(args)); if (data.quiet == NA_LOGICAL) data.quiet = 0; if (blskip == NA_LOGICAL) blskip = 1; if (multiline == NA_LOGICAL) multiline = 1; if (nskip < 0) nskip = 0; if (nlines < 0) nlines = 0; if (nmax < 0) nmax = 0; if (TYPEOF(stripwhite) != LGLSXP) error(_("invalid '%s' argument"), "strip.white"); if (xlength(stripwhite) != 1 && xlength(stripwhite) != xlength(what)) error(_("invalid 'strip.white' length")); if (TYPEOF(data.NAstrings) != STRSXP) error(_("invalid '%s' argument"), "na.strings"); if (TYPEOF(comstr) != STRSXP || length(comstr) != 1) error(_("invalid '%s' argument"), "comment.char"); if (isString(sep) || isNull(sep)) { if (length(sep) == 0) data.sepchar = 0; else { const char *sc = translateChar(STRING_ELT(sep, 0)); if(strlen(sc) > 1) error(_("invalid 'sep' value: must be one byte")); data.sepchar = (unsigned char) sc[0]; } /* gets compared to chars: bug prior to 1.7.0 */ } else error(_("invalid '%s' argument"), "sep"); if (isString(dec) || isNull(dec)) { if (length(dec) == 0) data.decchar = '.'; else { const char *dc = translateChar(STRING_ELT(dec, 0)); if(strlen(dc) != 1) error(_("invalid decimal separator: must be one byte")); data.decchar = dc[0]; } } else error(_("invalid decimal separator")); /* set up a context which will close the connection if there is an error or user interrupt */ begincontext(&cntxt, CTXT_CCODE, R_GlobalContext->call, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &scan_cleanup; cntxt.cenddata = &data; if (isString(quotes)) { const char *sc = translateChar(STRING_ELT(quotes, 0)); if (strlen(sc)) data.quoteset = Rstrdup(sc); else data.quoteset = ""; } else if (isNull(quotes)) data.quoteset = ""; else error(_("invalid quote symbol set")); p = translateChar(STRING_ELT(comstr, 0)); data.comchar = NO_COMCHAR; /* here for -Wall */ if (strlen(p) > 1) error(_("invalid '%s' argument"), "comment.char"); else if (strlen(p) == 1) data.comchar = (unsigned char)*p; if(escapes == NA_LOGICAL) error(_("invalid '%s' argument"), "allowEscapes"); data.escapes = escapes != 0; if(skipNul == NA_LOGICAL) error(_("invalid '%s' argument"), "skipNul"); data.skipNul = skipNul != 0; int ii = asInteger(file); data.con = getConnection(ii); if(ii == 0) { data.atStart = FALSE; data.ttyflag = 1; } else { data.atStart = (nskip == 0); data.ttyflag = 0; data.wasopen = data.con->isopen; if(!data.wasopen) { data.con->UTF8out = TRUE; /* a request */ strcpy(data.con->mode, "r"); if(!data.con->open(data.con)) error(_("cannot open the connection")); if(!data.con->canread) { data.con->close(data.con); error(_("cannot read from this connection")); } } else { if(!data.con->canread) error(_("cannot read from this connection")); } for(R_xlen_t i = 0, j = 10000; i < nskip; i++) { /* MBCS-safe */ for(;;) { c = scanchar(FALSE, &data); if (!j--) { R_CheckUserInterrupt(); j = 10000; } if (c == '\n' || c == R_EOF) break; } } } ans = R_NilValue; /* -Wall */ data.save = 0; switch (TYPEOF(what)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: ans = scanVector(TYPEOF(what), nmax, nlines, flush, stripwhite, blskip, &data); break; case VECSXP: ans = scanFrame(what, nmax, nlines, flush, fill, stripwhite, blskip, multiline, &data); break; default: error(_("invalid '%s' argument"), "what"); } PROTECT(ans); endcontext(&cntxt); /* we might have a character that was unscanchar-ed. So pushback if possible */ if (data.save && !data.ttyflag && data.wasopen) { char line[2] = " "; line[0] = (char) data.save; con_pushback(data.con, FALSE, line); } if (!data.ttyflag && !data.wasopen) data.con->close(data.con); if (data.quoteset[0]) free(data.quoteset); if (!skipNul && data.embedWarn) warning(_("embedded nul(s) found in input")); UNPROTECT(1); /* ans */ return ans; } attribute_hidden SEXP do_readln(SEXP call, SEXP op, SEXP args, SEXP rho) { int c; char buffer[MAXELTSIZE], *bufp = buffer; SEXP ans, prompt; checkArity(op,args); prompt = CAR(args); if (prompt == R_NilValue) { ConsolePrompt[0] = '\0'; /* precaution */ PROTECT(prompt); } else { PROTECT(prompt = coerceVector(prompt, STRSXP)); if(length(prompt) > 0) { strncpy(ConsolePrompt, translateChar(STRING_ELT(prompt, 0)), CONSOLE_PROMPT_SIZE - 1); ConsolePrompt[CONSOLE_PROMPT_SIZE - 1] = '\0'; } } if(R_Interactive) { /* skip space or tab */ while ((c = ConsoleGetchar()) == ' ' || c == '\t') ; if (c != '\n' && c != R_EOF) { *bufp++ = (char) c; while ((c = ConsoleGetchar())!= '\n' && c != R_EOF) { if (bufp >= &buffer[MAXELTSIZE - 2]) continue; *bufp++ = (char) c; } } /* now strip white space off the end as well */ while (--bufp >= buffer && (*bufp == ' ' || *bufp == '\t')) ; *++bufp = '\0'; ConsolePrompt[0] = '\0'; ans = mkString(buffer); } else { /* simulate CR as response */ Rprintf("%s\n", ConsolePrompt); ans = mkString(""); } UNPROTECT(1); return ans; }