/* * frb2db: program to parse an XML datafile from the Federal Reserve * Board and create a gretl database. * * Copyright (C) 2010 Allin Cottrell; written October 2010. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #include #include #include #include #include #include #include #define XUC const xmlChar * typedef struct frb_series_ frb_series; struct frb_series_ { char name[16]; char descrip[128]; int startyr; int startmon; int endyr; int endmon; int nobs; }; static void frb_series_init (frb_series *fs) { *fs->name = '\0'; *fs->descrip = '\0'; fs->startyr = 0; fs->startmon = 0; fs->endyr = 0; fs->endmon = 0; fs->nobs = 0; } static int series_is_monthly (const char *s) { return atoi(s) == 129; /* see frb_common.xsd */ } static int parse_frb_obs (xmlNodePtr node, FILE *fbin, frb_series *fs, int t) { xmlChar *status, *value, *period; int y, m, d; int err = 0; status = xmlGetProp(node, (XUC) "OBS_STATUS"); value = xmlGetProp(node, (XUC) "OBS_VALUE"); period = xmlGetProp(node, (XUC) "TIME_PERIOD"); if (status == NULL || value == NULL || period == NULL) { return 1; } if (sscanf((const char *) period, "%d-%d-%d", &y, &m, &d) != 3) { err = 1; } else if (t == 0) { fs->startyr = y; fs->startmon = m; } else { fs->endyr = y; fs->endmon = m; } if (!err) { float fx; if (!strcmp((const char *) status, "A")) { /* code for valid observation */ fx = (float) atof((const char *) value); } else { fx = -999.0; /* missing */ } fwrite(&fx, sizeof fx, 1, fbin); } free(status); free(value); free(period); return err; } struct namefix { const char *horrid; const char *ok; const char *desc; }; /* fixed strings for Fed's H15 (Interest Rate) dataset */ struct namefix H15_fixers[] = { { "RIFSPFF", "fedfund", "Federal Funds Rate (effective)" }, { "RIFSPBLP", "prime", "Bank Prime Loan Rate" }, { "RIFSPPNAAD30", "cp1m", "1-month Commercial Paper Rate - Nonfinancial" }, { "RIFSPPNAAD60", "cp2m", "2-month Commercial Paper Rate - Nonfinancial" }, { "RIFSPPNAAD90", "cp3m", "3-month Commercial Paper Rate - Nonfinancial" }, { "RIFSPPFAAD30", "fp1m", "1-month Commercial Paper Rate - Financial" }, { "RIFSPPFAAD60", "fp2m", "2-month Commercial Paper Rate - Financial" }, { "RIFSPPFAAD90", "fp3m", "3-month Commercial Paper Rate - Financial" }, { "RIFSPPCU", "cpffwout", "3-Mo Unsecured Paper to CPFF, w/o Unsecured Credit Surcharge" }, { "RIFSPPCUS", "cpffwith", "3-Mo Unsecured Paper to CPFF, with Unsecured Credit Surcharge" }, { "RIFSPDCNSM01", "cd1m", "1-month CDs, Secondary Market" }, { "RIFSPDCNSM03", "cd3m", "3-month CDs, Secondary Market" }, { "RIFSPDCNSM06", "cd6m", "6-month CDs, Secondary Market" }, { "RIBLGNG20", "slbond", "Bond Buyer GO 20-Year Bond Municipal Bond Index" }, { "RMMPCCFC", "cm", "30-year Fixed Rate Conventional Mortgages" }, { "RIFLDIY01", "swap1y", "1-year Interest Rate Swap" }, { "RIFLDIY02", "swap2y", "2-year Interest Rate Swap" }, { "RIFLDIY03", "swap3y", "3-year Interest Rate Swap" }, { "RIFLDIY04", "swap4y", "4-year Interest Rate Swap" }, { "RIFLDIY05", "swap5y", "5-year Interest Rate Swap" }, { "RIFLDIY07", "swap7y", "7-year Interest Rate Swap" }, { "RIFLDIY10", "swap10y", "10-year Interest Rate Swap" }, { "RIFLDIY30", "swap30y", "30-year Interest Rate Swap" }, { "RIFLGFCY01", "tcm1y", "1-year Treasury Constant Maturity" }, { "RIFLGFCY02", "tcm2y", "2-year Treasury Constant Maturity" }, { "RIFLGFCY03", "tcm3y", "3-year Treasury Constant Maturity" }, { "RIFLGFCY05", "tcm5y", "5-year Treasury Constant Maturity" }, { "RIFLGFCY07", "tcm7y", "7-year Treasury Constant Maturity" }, { "RIFLGFCY10", "tcm10y", "10-year Treasury Constant Maturity" }, { "RIFLGFCY20", "tcm20y", "20-year Treasury Constant Maturity" }, { "RIFLGFCY30", "tcm30y", "30-year Treasury Constant Maturity" }, { "RIFLGFCY05_XII", "tcm5yi", "5-year inflation indexed Treasury Constant Maturity" }, { "RIFLGFCY07_XII", "tcm7yi", "7-year inflation indexed Treasury Constant Maturity" }, { "RIFLGFCY10_XII", "tcm10yi", "10-year inflation indexed Treasury Constant Maturity" }, { "RIFLGFCY20_XII", "tcm20yi", "20-year inflation indexed Treasury Constant Maturity" }, { "RIFLGFCY30_XII", "tcm30yi", "30-year inflation indexed Treasury Constant Maturity" }, { "RIFLGFL_XII", "tltavg", "Inflation indexed Treasury long-term average (over 10 years)" }, { "RIFLGFCM01", "tbmy1m", "1-month Treasury Bills, Market Yield" }, { "RIFLGFCM03", "tbmy3m", "3-month Treasury Bills, Market Yield" }, { "RIFLGFCM06", "tbmy6m", "6-month Treasury Bills, Market Yield" }, { "RIFSGFSW04", "tbsm4w", "4-week Treasury bill, Secondary Market" }, { "RIFSGFSM03", "tbsm3m", "3-month Treasury bill, Secondary Market" }, { "RIFSGFSM06", "tbsm6m", "6-month Treasury bill, Secondary Market" }, { "RIFSGFSY01", "tbsm1y", "1-year Treasury bill, Secondary Market" }, { "RILSPDEPM01", "ed1m", "1-month Euro-Dollar Deposit Rate" }, { "RILSPDEPM03", "ed3m", "3-month Euro-Dollar Deposit Rate" }, { "RILSPDEPM06", "ed6m", "6-month Euro-Dollar Deposit Rate" }, { "RIMLPAAAR", "aaa", "Moody's Yield on Seasoned Corporate AAA" }, { "RIMLPBAAR", "baa", "Moody's Yield on Seasoned Corporate BAA" }, { "RIFSRP_F02", "rega", "Primary credit under FRB's amended Regulation A" }, { NULL, NULL, NULL } }; struct descfix { const char *targ; const char *repl; }; static void set_frb_series_descrip (frb_series *fs, const char *s) { struct descfix fixers[] = { { "quoted on an investment basis", "investment basis" }, { "quoted on investment basis", "investment basis" }, { "certificates of deposit", "CDs" }, { "neogtiable", "negotiable" }, { " Interest Rate", "" }, { "^", "" }, { NULL, NULL } }; char tmp[256] = {0}; int i, n; *tmp = '\0'; n = 0; while (*s && n < 255) { for (i=0; fixers[i].targ != NULL; i++) { if (!strncmp(s, fixers[i].targ, strlen(fixers[i].targ))) { strcat(tmp, fixers[i].repl); s += strlen(fixers[i].targ); n += strlen(fixers[i].repl); break; } } if (*s == '\n' || *s == ' ') { tmp[n++] = ' '; s++; while (*s == ' ') s++; continue; } else { tmp[n++] = *s; } s++; } n = strlen(tmp); if (tmp[n-1] == '.') { tmp[n-1] = '\0'; } strncat(fs->descrip, tmp, 127); } static int parse_frb_annotations (xmlDocPtr doc, xmlNodePtr node, frb_series *fs) { xmlNodePtr c2, c1 = node->xmlChildrenNode; int gotdesc = 0; int err = 0; while (c1 != NULL && !err && !gotdesc) { if (!xmlStrcmp(c1->name, (XUC) "Annotation")) { c2 = c1->xmlChildrenNode; while (c2 != NULL && !err && !gotdesc) { if (!xmlStrcmp(c2->name, (XUC) "AnnotationType")) { xmlChar *type = xmlNodeListGetString(doc, c2->xmlChildrenNode, 1); /* Note: one could use the "Short Description" field here instead */ if (type != NULL && !xmlStrcmp(type, (XUC) "Long Description")) { xmlChar *text = NULL; c2 = c2->next; if (c2 == NULL) { err = 1; } else if (xmlStrcmp(c2->name, (XUC) "AnnotationText")) { err = 1; } else { text = xmlNodeListGetString(doc, c2->xmlChildrenNode, 1); if (text == NULL) { err = 1; } else { set_frb_series_descrip(fs, (const char *) text); free(text); gotdesc = 1; } } } free(type); } c2 = c2->next; } } c1 = c1->next; } if (!err && !gotdesc) { err = 1; } return err; } static int set_frb_series_name (frb_series *fs, const char *s, const char *dataset) { char *p, tmp[64]; int err = 0; *tmp = '\0'; strncat(tmp, s, 63); p = strstr(tmp, "_N."); if (p != NULL) { *p = '\0'; } p = tmp; while (*p) { if (*p == '.') { *p = '_'; } p++; } if (!strcmp(dataset, "H15")) { int i; for (i=0; H15_fixers[i].horrid != NULL; i++) { if (strcmp(tmp, H15_fixers[i].horrid) == 0) { strcpy(fs->name, H15_fixers[i].ok); strcpy(fs->descrip, H15_fixers[i].desc); return 0; } } } if (strlen(tmp) > 15) { fprintf(stderr, "Series name too long: '%s'\n", tmp); err = 1; } else { strncat(fs->name, tmp, 15); } return err; } static int parse_frb_series (xmlDocPtr doc, xmlNodePtr node, FILE *fidx, FILE *fbin, const char *dset) { xmlChar *freq, *sername = NULL; xmlNodePtr cur; frb_series fs; int t, err = 0; freq = xmlGetProp(node, (XUC) "FREQ"); if (freq == NULL) { return 1; } if (!series_is_monthly((const char *) freq)) { free(freq); return 0; } frb_series_init(&fs); sername = xmlGetProp(node, (XUC) "SERIES_NAME"); if (sername == NULL) { free(freq); return 1; } set_frb_series_name(&fs, (const char *) sername, dset); cur = node->xmlChildrenNode; t = 0; while (cur != NULL && !err) { if (!xmlStrcmp(cur->name, (XUC) "Annotations")) { if (*fs.descrip == '\0') { err = parse_frb_annotations(doc, cur, &fs); } } else if (!xmlStrcmp(cur->name, (XUC) "Obs")) { err = parse_frb_obs(cur, fbin, &fs, t++); } cur = cur->next; } if (!err && fidx != NULL) { /* print the series index entry */ fprintf(fidx, "%s %s\n", fs.name, fs.descrip); fprintf(fidx, "M %d.%02d - %d.%02d n = %d\n", fs.startyr, fs.startmon, fs.endyr, fs.endmon, t); } free(freq); free(sername); return err; } static int skip_dataset (const char *id) { return strcmp(id, "discontinued") == 0; } static int parse_frb_dataset (xmlDocPtr doc, xmlNodePtr node, FILE *fidx, FILE *fbin) { xmlChar *id; xmlNodePtr cur; int err = 0; id = xmlGetProp(node, (XUC) "id"); if (id == NULL) { return 1; } if (skip_dataset((const char *) id)) { return 0; } printf("Dataset: %s\n", (const char *) id); cur = node->xmlChildrenNode; while (cur != NULL && !err) { if (!xmlStrcmp(cur->name, (XUC) "Series")) { err = parse_frb_series(doc, cur, fidx, fbin, (const char *) id); } cur = cur->next; } free(id); return err; } static int parse_frb_header (xmlNodePtr node) { xmlNodePtr cur = node->xmlChildrenNode; int err = 0; while (cur != NULL && !err) { printf("Header: got node %s\n", (const char *) cur->name); cur = cur->next; } return err; } /* Parse XML file obtained from FRB, using the libxml2 API */ static int parse_frb_xml (const char *fname, FILE *fidx, FILE *fbin) { xmlDocPtr doc; xmlNodePtr node, cur; int err = 0; doc = xmlReadFile(fname, NULL, XML_PARSE_NOBLANKS); if (doc == NULL) { fprintf(stderr, "parse_frb_xml: xmlReadFile failed\n"); err = 1; } if (!err) { node = xmlDocGetRootElement(doc); if (node == NULL) { fprintf(stderr, "parse_frb_xml: empty document\n"); err = 1; } } cur = node->xmlChildrenNode; while (cur != NULL && !err) { if (!xmlStrcmp(cur->name, (XUC) "Header")) { err = parse_frb_header(cur); } else if (!xmlStrcmp(cur->name, (XUC) "DataSet")) { err = parse_frb_dataset(doc, cur, fidx, fbin); } cur = cur->next; } if (doc != NULL) { xmlFreeDoc(doc); } return err; } int main (int argc, char **argv) { FILE *fidx = NULL, *fbin = NULL; const char *fname; if (argc < 2) { fprintf(stderr, "%s: give the nname of an FRB data file to parse\n", argv[0]); exit(EXIT_FAILURE); } fname = argv[1]; fidx = fopen("fedbog.idx", "w"); fbin = fopen("fedbog.bin", "wb"); if (fidx == NULL || fbin == NULL) { fprintf(stderr, "%s: couldn't open output files\n", argv[0]); exit(EXIT_FAILURE); } else { fputs("# Federal Reserve Board (interest rates)\n", fidx); } xmlInitParser(); parse_frb_xml(fname, fidx, fbin); fclose(fidx); fclose(fbin); xmlCleanupParser(); return 0; }