/* gretlzip.c -- zipping and unzipping for gretl databases using zlib routines. Allin Cottrell (cottrell@wfu.edu) November, 2000 (revised, October 2002) Further revised February 2003 to allow for inclusion of a database codebook. Than again in January 2018 to allow for the codebook to be a PDF file. May 2023: streamline the code and make it a bit more robust. */ #include #include #include #include #include #include #include #include #include #include #include #define MAXLEN 255 #define BUFSIZE 8192 #define PATHSEP '/' #define INFOLEN 100 static void print_time_long (char *buf, const time_t *timep) { char *timebuf = ctime(timep); timebuf[strlen(timebuf)-1] = ' '; strcat(buf, timebuf); } static void print_time_short (char *buf, const time_t *timep) { struct tm *ztime; char timebuf[32]; ztime = localtime(timep); sprintf(timebuf, "%4d%02d%02d", ztime->tm_year + 1900, ztime->tm_mon + 1, ztime->tm_mday); strcat(buf, timebuf); } static char *strip_path (char *fname) { char *p = strrchr(fname, PATHSEP); if (p != NULL && *(p + 1)) { return p + 1; } else { return fname; } } static char *switch_ext (char *fname, char *ext) { char *p = strrchr(fname, '.'); if (p != NULL) { strcat(p, ext); } else { strcat(fname, ext); } return fname; } static int parse_db_header (const char *buf, size_t *idxlen, size_t *datalen, size_t *cblen, int *pdfdoc) { const char *p; *cblen = 0; /* length of index file (required) */ if (sscanf(buf, "%lu", idxlen) != 1) { return 1; } /* length of data (required) */ p = strchr(buf, '\n'); if (p == NULL) { return 1; } else { p++; if (sscanf(p, "%lu", datalen) != 1) { return 1; } } /* codebook info (optional) */ p = strchr(p, '\n'); if (p != NULL) { p++; if (sscanf(p, "%lu", cblen) != 1) { *cblen = 0; } else if (strstr(p, ".pdf")) { *pdfdoc = 1; } } return 0; } static void close_infiles (FILE *infiles[], int n) { int i; for (i=0; i 0) { chk = gzwrite(fgz, gzbuf, len); if (chk != len) { fprintf(stderr, "*** gzwrite: len = %d but chk = %d\n", len, chk); } } } close_infiles(infiles, n_files); gzclose(fgz); return 0; } static int ggz_extract (char *infobuf, char *fname, char *outname) { int fids[3] = {-1, -1, -1}; size_t sizes[3] = {0}; size_t u, umax, rem; int bgot, wrote, pdfdoc = 0; char outnames[3][MAXLEN] = {0}; char gzbuf[BUFSIZE] = {0}; gzFile fgz; int n_files = 2; int i, err = 0; /* initial check on gzipped input file */ strcat(fname, ".gz"); fgz = gzopen(fname, "rb"); if (fgz == NULL) { sprintf(infobuf, "Couldn't gzopen %s for reading\n", fname); return 1; } gzread(fgz, gzbuf, INFOLEN); strcpy(infobuf, gzbuf); if (parse_db_header(infobuf, &sizes[0], &sizes[1], &sizes[2], &pdfdoc)) { fputs("Error reading info buffer: failed to get byte counts\n", stderr); gzclose(fgz); return 1; } /* set up output filenames */ sprintf(outnames[0], "%s.idx", outname); sprintf(outnames[1], "%s.bin", outname); if (sizes[2] > 0) { /* got a codebook buffer */ if (pdfdoc) { fputs("Detected PDF codebook\n", stderr); sprintf(outnames[2], "%s.pdf", outname); } else { fputs("Detected plain text codebook\n", stderr); sprintf(outnames[2], "%s.cb", outname); } n_files = 3; } /* open output files for writing */ for (i=0; i BUFSIZE)? BUFSIZE : rem); wrote = write(fids[i], gzbuf, bgot); if (wrote != bgot) { sprintf(infobuf, "%s: bytes written %d, should be %d\n", outnames[i], wrote, bgot); err = 1; } } } gzclose(fgz); for (i=0; i 1) { callname += 1; } else { callname = argv[0]; } if (!strcmp(callname, "gretlunzip")) { filearg--; unzip = 1; } if ((unzip && argc != 2) || (!unzip && argc < 3)) { usage(argv[0]); } if (!strcmp(argv[1], "-c")) { create = 1; } else if (!unzip && strcmp(argv[1], "-x")) { usage(argv[0]); } *fname = '\0'; strncat(fname, argv[filearg], MAXLEN-1); *outname = '\0'; if (argc == 4) { strncat(outname, argv[filearg + 1], MAXLEN-1); } else { strcpy(outname, fname); } switch_ext(fname, ""); switch_ext(outname, ""); fprintf(stderr, "Taking input from %s%s\nWriting output to %s%s\n", fname, (create)? " (.idx, .bin)": ".gz", outname, (create)? ".gz" : " (.idx, .bin)"); if (create) { err = ggz_create(infobuf, fname, outname); } else { err = ggz_extract(infobuf, fname, outname); } if (err) { fprintf(stderr, "%s", infobuf); } else if (create) { printf("Found and compressed:\n%s", infobuf); } else { printf("Found and decompressed:\n%s", infobuf); } return 0; }