On Wed, Dec 16, 2009 at 10:13 PM, Peng Yu <pengyu...@gmail.com> wrote: > > Currently, I load the RData file then ls() and str(). But loading the file > takes too long if the file is big. Most of the time, I only interested what > the variables are in the the file and the attributes of the variables (like > if it is a data.frame, matrix, what are the colnames/rownames, etc.) > > I'm wondering if there is any facility in R to help me avoid loading the > whole file.
I thought this was interesting as well, so i did a bit of searching through the R-help list archives and found this answer by Simon Urbanek: https://stat.ethz.ch/pipermail/r-devel/2007-August/046724.html The link to a c-routine that does what you want still works, but for future reference I'm pasting the code below. Regards, Gustaf ---------------------------- /* rdcopy v0.1-0 - extract objects or display contents of RData RDX2 files * * Copyright (C) 2007 Simon Urbanek * based in part on src/main/serialize.c and src/main/saveload.c from R: * Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka * Copyright (C) 1997--2007 Robert Gentleman, Ross Ihaka and the * R Development Core Team * License: GPL v2 * * Although R includes are needed to compile this (for constants), * libR does NOT have to be linked. */ #include <stdio.h> #include <rpc/types.h> #include <rpc/xdr.h> #include <R.h> #include <Rinternals.h> #ifndef _ #define _(X) X #endif #undef error void error(char *fmt, ...) { va_list(ap); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); exit(1); } /* .RData: byte 0..4 XDR2. - file magic ("XDR2\n"=XDR ver2) byte 5..6 X. - format ("A\n"=ASCII, "B\n"=binary, "X\n"=XDR) byte 7... RXDR2 stream. Note: RXDR2 format in NOT a valid XDR format! Strings and raw bytes are not padded and thus cannot be read using XDR alone. */ /* we need to override this so that we don't have to really use libR */ SEXP R_NilValue = 0; /* those are directly from serialize.c */ #define REFSXP 255 #define NILVALUE_SXP 254 #define GLOBALENV_SXP 253 #define UNBOUNDVALUE_SXP 252 #define MISSINGARG_SXP 251 #define BASENAMESPACE_SXP 250 #define NAMESPACESXP 249 #define PACKAGESXP 248 #define PERSISTSXP 247 #define CLASSREFSXP 246 #define GENERICREFSXP 245 #define BCREPDEF 244 #define BCREPREF 243 #define EMPTYENV_SXP 242 #define BASEENV_SXP 241 /* map type to a name */ static const char *nameSEXP(int type) { switch (type) { case REFSXP: return "REF"; case NILVALUE_SXP: return "NULL"; case GLOBALENV_SXP: return ".GlobalEnv"; case UNBOUNDVALUE_SXP: return "<unbound>"; case MISSINGARG_SXP: return "<missing>"; case BASENAMESPACE_SXP: return "<<base>>"; case NAMESPACESXP: return "NAMESPACE"; case PACKAGESXP: return "PACKAGE"; case PERSISTSXP: return "PERSIST"; case CLASSREFSXP: return "CLASSREF"; case GENERICREFSXP: return "GENERICREF"; case BCREPDEF: return "BC-REP-DEF"; case BCREPREF: return "BC-REP-REF"; case EMPTYENV_SXP: return "<empty-env>"; case BASEENV_SXP: return "<base-env>"; case NILSXP: return "NIL"; case SYMSXP: return "SYM"; case LISTSXP: return "LIST"; case CLOSXP: return "CLO"; case ENVSXP: return "ENV"; case PROMSXP: return "PROM"; case LANGSXP: return "LANG"; case SPECIALSXP: return "SPECIAL"; case BUILTINSXP: return "BUILTIN"; case CHARSXP: return "CHAR"; case LGLSXP: return "LGL"; case INTSXP: return "INT"; case REALSXP: return "REAL"; case CPLXSXP: return "CPLX"; case STRSXP: return "STR"; case DOTSXP: return "..."; case ANYSXP: return "ANY"; case VECSXP: return "VEC"; case EXPRSXP: return "EXPR"; case BCODESXP: return "BCODE"; case EXTPTRSXP: return "EXTPTR"; case WEAKREFSXP: return "WEAKREF"; case RAWSXP: return "RAW"; case S4SXP: return "S4"; } return "?"; } /* again from serialize.c */ #define IS_OBJECT_BIT_MASK (1 << 8) #define HAS_ATTR_BIT_MASK (1 << 9) #define HAS_TAG_BIT_MASK (1 << 10) #define ENCODE_LEVELS(v) (v << 12) #define DECODE_LEVELS(v) (v >> 12) #define DECODE_TYPE(v) (v & 255) /* this structure is passed acros all functions. it encapsulates both the reading an book-keeping */ typedef struct { XDR xdrs; char *buf; long bs; FILE *f; int lev; char *flag; int refs; long *ref; /* reference offsets */ int maxrefs; /* length of the refes vector */ int verb; int mode; int flags; long target; FILE *copyf; } SaveLoadData; #define M_Read 0 #define M_NonRefCopy 1 #define M_Copy 2 #define M_NonRefSelect 3 #define F_NOREF 1 /* the following is partially based on src/main/saveload.c from R */ static void XdrInInit(FILE *fp, SaveLoadData *d, long sbsize) { xdrstdio_create(&d->xdrs, fp, XDR_DECODE); d->buf = (char*) malloc(sbsize); if (!(d->buf)) error(_("cannot allocate memory for a string buffer")); d->bs = sbsize; d->f = fp; d->lev = 0; d->flag = 0; d->flags = 0; d->refs = 0; d->maxrefs = 2048; d->ref = (long*) malloc(sizeof(long)*d->maxrefs); d->copyf = 0; d->mode = M_Read; } static void XdrInTerm(SaveLoadData *d) { xdr_destroy(&d->xdrs); free(d->buf); if (d->f) fclose(d->f); if (d->copyf) fclose(d->copyf); } static void XdrSkipBytes(SaveLoadData *d, int n) { while (n > d->bs) { XdrSkipBytes(d, d->bs); n-=d->bs; } fread(d->buf, 1, n, d->f); if (d->copyf && (d->mode == M_NonRefCopy || d->mode == M_Copy)) fwrite(d->buf, 1, n, d->copyf); /* fseek(d->f, n, SEEK_CUR); */ } static int XdrInInteger(SaveLoadData *d) { int i=0; if (!xdr_int(&d->xdrs, &i)) { xdr_destroy(&d->xdrs); error(_("a I read error occurred")); } if (d->copyf && (d->mode == M_NonRefCopy || d->mode == M_Copy)) { unsigned int y = (unsigned int) i; unsigned char ib[4]; ib[0]=y>>24; ib[1]=(y>>16)&255; ib[2]=(y>>8)&255; ib[3]=y&255; fwrite(ib, 1, 4, d->copyf); } return i; } static double XdrInReal(SaveLoadData *d) { double x; if (!xdr_double(&d->xdrs, &x)) { xdr_destroy(&d->xdrs); error(_("a R read error occurred")); } if (d->copyf && (d->mode == M_NonRefCopy || d->mode == M_Copy)) { unsigned long long y = *((unsigned long long*) &x); unsigned char ib[8]; ib[0]=y>>56; ib[1]=(y>>48)&255; ib[2]=(y>>40)&255; ib[3]=(y>>32)&255; ib[4]=(y>>24)&255; ib[5]=(y>>16)&255; ib[6]=(y>>8)&255; ib[7]=y&255; fwrite(ib, 1, 8, d->copyf); } return x; } static Rcomplex XdrInComplex(SaveLoadData *d) { Rcomplex x; if (!xdr_double(&d->xdrs, &(x.r)) || !xdr_double(&d->xdrs, &(x.i))) { xdr_destroy(&d->xdrs); error(_("a CR read error occurred")); } if (d->copyf && (d->mode == M_NonRefCopy || d->mode == M_Copy)) { unsigned long long y = *((unsigned long long*) &x.r); unsigned long long v = *((unsigned long long*) &x.i); unsigned char ib[16]; ib[0]=y>>56; ib[1]=(y>>48)&255; ib[2]=(y>>40)&255; ib[3]=(y>>32)&255; ib[4]=(y>>24)&255; ib[5]=(y>>16)&255; ib[6]=(y>>8)&255; ib[7]=y&255; ib[8]=v>>56; ib[9]=(v>>48)&255; ib[10]=(v>>40)&255; ib[11]=(v>>32)&255; ib[12]=(v>>24)&255; ib[13]=(v>>16)&255; ib[14]=(v>>8)&255; ib[15]=v&255; fwrite(ib, 1, 16, d->copyf); } return x; } static char *XdrInBytes(SaveLoadData *d, char *buf, unsigned int len) { if (!buf) { XdrSkipBytes(d, len); return d->buf; } fread(buf, 1, len, d->f); if (d->copyf && (d->mode == M_NonRefCopy || d->mode == M_Copy)) fwrite(buf, 1, len, d->copyf); return buf; } static char *XdrInString(SaveLoadData *d) { if (!xdr_string(&d->xdrs, &d->buf, d->bs)) { xdr_destroy(&d->xdrs); error(_("a S read error occurred")); } return d->buf; } /* back to serialize.c */ #define UNPACK_REF_INDEX(i) ((i) >> 8) static SEXP ReadItem(SaveLoadData *d); static SEXP InStringVec(SaveLoadData *d) { int per = XdrInInteger(d); int len = XdrInInteger(d); int i = 0; while (i < len) { ReadItem(d); i++; } return R_NilValue; } static void AddReadRef(SaveLoadData *d, long off) { if (d->flags & F_NOREF) return; d->ref[d->refs++] = off; if (d->verb) printf(" {ref=%d}", d->refs); if (d->refs>=d->maxrefs) { XdrInTerm(d); error(_("too many references in the data file")); } } #define InVec(fp, obj, accessor, infunc, length) \ { \ int cnt; \ for (cnt = 0; cnt < length; ++cnt) \ /*accessor(obj, cnt,*/ infunc(fp); \ } static SEXP ReadBCLang(SaveLoadData *d, int type) { switch (type) { case BCREPREF: XdrInInteger(d); return R_NilValue; case BCREPDEF: case LANGSXP: case LISTSXP: { int pos = -1; if (type == BCREPDEF) { pos = XdrInInteger(d); type = XdrInInteger(d); } /*TAG*/ ReadItem(d); /*CAR*/ ReadBCLang(d, XdrInInteger(d)); /*CDR*/ ReadBCLang(d, XdrInInteger(d)); return R_NilValue; } default: return ReadItem(d); } } static SEXP ReadBC(SaveLoadData *d) { ReadItem(d); /* code */ { /* consts */ int blen = XdrInInteger(d); int bc = 0; while (bc < blen) { int type = XdrInInteger(d); switch (type) { case BCODESXP: ReadBC(d); break; case LANGSXP: case LISTSXP: case BCREPDEF: case BCREPREF: ReadBCLang(d, type); break; default: ReadItem(d); } bc++; } } } static SEXP ReadItem_(SaveLoadData *d, long boe, int cut); static SEXP ReadItem(SaveLoadData *d) { long boe = ftell(d->f); if (d->mode==M_NonRefSelect && boe==d->target) { printf(" -> saving object at %ld\n", boe); d->mode=M_NonRefCopy; ReadItem_(d, boe, 1); d->mode=M_Read; } else ReadItem_(d, boe, 0); return 0; } static SEXP ReadItem_(SaveLoadData *d, long boe, int cut) { int flags = XdrInInteger(d); int type = DECODE_TYPE(flags); int lev = DECODE_LEVELS(flags); int hasattr = flags & HAS_ATTR_BIT_MASK ? 1 : 0; int hastag = flags & HAS_TAG_BIT_MASK ? 1 : 0; int isobj = flags & IS_OBJECT_BIT_MASK ? 1 : 0; SEXP s = R_NilValue; int len; int isroot = 0; char px[64], *cpx=px+d->lev; *cpx=0; while (--cpx>=px) *cpx=' '; if (!d->flag) d->flag=""; if (type!=CHARSXP && d->verb) printf("\...@%-7ld%s%s %s %08x [type=%d%s%s%s]", boe, px, d->flag, nameSEXP(type), flags, type, hasattr?",ATTR":"", hastag?",TAG":"", isobj?",OBJ":""); d->flag=""; switch(type) { case NILVALUE_SXP: return 0/* R_NilValue */; case EMPTYENV_SXP: return 0/*R_EmptyEnv*/; case BASEENV_SXP: return 0/*R_BaseEnv*/; case GLOBALENV_SXP: return 0/*R_GlobalEnv*/; case UNBOUNDVALUE_SXP: return 0/*R_UnboundValue*/; case MISSINGARG_SXP: return 0/*R_MissingArg*/; case BASENAMESPACE_SXP: return 0/*R_BaseNamespace*/; case REFSXP: { int refi = UNPACK_REF_INDEX(flags); if (!refi) refi = XdrInInteger(d); if (d->verb) printf("<REFSXP: %d>", refi); if (d->mode==M_NonRefCopy) { long cp = ftell(d->f); long cop = ftell(d->copyf); long back = -4; SaveLoadData e; e.verb=0; if (!UNPACK_REF_INDEX(flags)) back -= 4; if (refi<1 || refi>d->refs) { XdrInTerm(d); error(_("invalid reference %d"), refi); } if (fseek(d->f, d->ref[refi-1], SEEK_SET)) { XdrInTerm(d); error(_("unable to seek to reference %d"), refi); } if (fseek(d->copyf, back, SEEK_CUR)) { /* backup to overwise the reference */ XdrInTerm(d); error(_("unable to seek in the output stream")); } XdrInInit(d->f, &e, d->bs); e.flags=F_NOREF; e.copyf=d->copyf; e.mode=d->mode; ReadItem(&e); e.copyf=0; e.f=0; /* we need to delete those to Term doesn't close them */ XdrInTerm(&e); if (fseek(d->f, cp, SEEK_SET)) { XdrInTerm(d); error(_("unable to return to reference point")); } } return R_NilValue; } case PERSISTSXP: InStringVec(d); AddReadRef(d, boe); return s; case SYMSXP: d->lev++; ReadItem(d); /* print name */ AddReadRef(d, boe); d->lev--; return s; case PACKAGESXP: InStringVec(d); AddReadRef(d, boe); return s; case NAMESPACESXP: InStringVec(d); AddReadRef(d, boe); return s; case ENVSXP: { int locked = XdrInInteger(d); AddReadRef(d, boe); d->lev++; /*ENCLOS*/ ReadItem(d); /*FRAME*/ ReadItem(d); /*TAG*/ ReadItem(d); /*ATTR*/ ReadItem(d); /* We don't write out the object bit for environments, so reconstruct it here if needed. */ /* Convert a NULL enclosure to baseenv() if (ENCLOS(s) == R_NilValue) SET_ENCLOS(s, R_BaseEnv); */ d->lev--; return s; } case LISTSXP: if (d->lev==0) isroot=1; case LANGSXP: case CLOSXP: case PROMSXP: case DOTSXP: d->lev++; if (hasattr) { d->flag="ATT"; ReadItem(d); } if (hastag) { d->flag="TAG"; ReadItem(d); if (isroot) printf(d->verb?"\n%s\t%ld":"%s\t%ld\n", d->buf, boe); }; /*CAR*/ d->flag="CAR"; ReadItem(d); if (cut) { /* if this is the selected object, then we cannot proceed to CDR but close it instead */ unsigned char ib[4] = { 0, 0, 0, NILVALUE_SXP }; fwrite(ib, 1, 4, d->copyf); d->lev--; return 0; } /*CDR*/ d->flag="CDR"; if (isroot) d->lev=0; ReadItem(d); /* For reading closures and promises stored in earlier versions, convert NULL env to baseenv() if (type == CLOSXP && CLOENV(s) == R_NilValue) SET_CLOENV(s, R_BaseEnv); else if (type == PROMSXP && PRENV(s) == R_NilValue) SET_PRENV(s, R_BaseEnv); */ if (d->lev>0) d->lev--; isroot=0; return s; default: /* These break out of the switch to have their ATTR, LEVELS, and OBJECT fields filled in. Each leaves the newly allocated value PROTECTed */ switch (type) { case EXTPTRSXP: d->lev++; AddReadRef(d, boe); /*PtrProtected*/ ReadItem(d); /*PtrTag*/ ReadItem(d); d->lev--; break; case WEAKREFSXP: AddReadRef(d, boe); break; case SPECIALSXP: case BUILTINSXP: len = XdrInInteger(d); XdrInBytes(d, 0, len); break; case CHARSXP: len = XdrInInteger(d); if (len == -1) s = 0 /*NA_STRING*/; else { char *c = XdrInBytes(d, 0, len); c[len]=0; if (d->verb>1) printf(" '%s'", c); } break; case LGLSXP: len = XdrInInteger(d); InVec(d, s, SET_LOGICAL_ELT, XdrInInteger, len); break; case INTSXP: len = XdrInInteger(d); InVec(d, s, SET_INTEGER_ELT, XdrInInteger, len); break; case REALSXP: len = XdrInInteger(d); InVec(d, s, SET_REAL_ELT, XdrInReal, len); break; case CPLXSXP: len = XdrInInteger(d); InVec(d, s, SET_COMPLEX_ELT, XdrInComplex, len); break; case STRSXP: { int count = 0; len = XdrInInteger(d); d->lev++; for (; count < len; ++count) ReadItem(d); d->lev--; } break; case VECSXP: case EXPRSXP: { int count = 0; len = XdrInInteger(d); d->lev++; for (; count < len; ++count) ReadItem(d); d->lev--; } break; case BCODESXP: { int count = 0; len = XdrInInteger(d); while (count < len) { ReadBC(d); count++; } } break; case CLASSREFSXP: error(_("this version of R cannot read class references")); case GENERICREFSXP: error(_("this version of R cannot read generic function references")); case RAWSXP: len = XdrInInteger(d); XdrSkipBytes(d, len); break; case S4SXP: break; default: s = R_NilValue; /* keep compiler happy */ error(_("ReadItem: unknown type %i, perhaps written by later version of R"), type); } d->lev++; if (hasattr) ReadItem(d); d->lev--; return s; } } int main(int ac, char **av) { char sig[16]; int ver, wri, rel; FILE *f, *of = 0; SaveLoadData sal, *d = &sal; if (ac<2) { printf("\n Usage: rdcopy <source> [-v | <target> <offset>]\n\n Extracts an object from a RData file.\n Use rdlist to obtain all valid offsets for each object.\n\n"); return 1; } f = fopen(av[1], "rb"); sal.verb = 0; if (!f) error(_("unable to open file %s"), av[1]); if (fread(sig, 1, 7, f)!=7) { fclose(f); error(_("unable to read magic number")); } sig[7]=0; if (!strcmp(sig, "XDR2\nX\n")) { { char *c=sig; while(*c) { if (*c<' ') *c='.'; c++; } } printf("Format: '%s'\n", sig); fclose(f); error(_("XDR v2 is the only supported format")); } if (ac>2) { if (!strcmp(av[2],"-v")) { d->verb=2; } else { of = fopen(av[2], "wb"); if (!of) { fclose(f); error(_("unable to create %s"), av[2]); } fwrite(sig, 1, 7, of); } } XdrInInit(f, d, 64*1024); d->mode=of?M_NonRefCopy:M_Read; d->copyf=of; d->target=ac>3?atol(av[3]):0; ver=XdrInInteger(d); wri=XdrInInteger(d); rel=XdrInInteger(d); printf("Format version %x, R version = %d.%d.%d, release = %x\n", ver, wri>>16, (wri>>8)&255, wri&255, rel); if (ver != 2) { XdrInTerm(d); error(_("Sorry, this tool supported RXDR version 2 format only\n")); } if (of) d->mode=M_NonRefSelect; ReadItem(d); XdrInTerm(d); if (d->mode!=M_Read) printf("\nNo object selected. Please use above offsets to select an object.\n"); return 0; } ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.