/* iso2asc -- a simple, but powerful tool that allows you to convert text files coded using the 8-bit character set ISO 8859-1 as readable as possible to 7-bit ASCII files and a few other character sets. This C program should compile easily without modifications with allmost any C compiler (old K&R, ISO C and C++) under almost any operating system. The manual is printed when the program is started without command line arguments Author: Markus Kuhn, University of Erlangen, Germany 1993-11-02 Feel free to redistribute this software. */ #include #define ISO_TABLES 7 #define BUF_LENGTH 3000 char inbuf[BUF_LENGTH]; char outbuf[4*BUF_LENGTH]; /* Print a short manual to stderr and abort */ void usage() { fprintf(stderr,"iso2asc V1.1 -- Markus Kuhn\n\n"); fprintf(stderr,"Usage: iso2asc {=} "); fprintf(stderr,"[-s] [-l]\n\n"); fprintf(stderr,"Reads a text file from standard input "); fprintf(stderr,"encoded with the 8-bit character set\nISO 8859-1. "); fprintf(stderr,"Standard output is the text file with non-ASCII "); fprintf(stderr,"characters replaced\n"); fprintf(stderr,"by ASCII characters as readable as possible. All users "); fprintf(stderr,"can't be satisfied with\none single transcription "); fprintf(stderr,"table, so select one of the following %d tables:\n\n", ISO_TABLES); fprintf(stderr," 0 universal table for many languages (simply remove "); fprintf(stderr,"all accents)\n"); fprintf(stderr," 1 monospaced version of table 0\n"); fprintf(stderr," 2 table for Danish, Dutch, German, Norwegian and "); fprintf(stderr,"Swedish (a+\" -> ae, etc.)\n"); fprintf(stderr," 3 table for Danish, Finnish, Norwegian and Swedish "); fprintf(stderr,"using\n the appropriate ISO 646 variant of "); fprintf(stderr,"ASCII (o+\" -> |, etc.)\n"); fprintf(stderr," 4 table with RFC 1345 codes in brackets "); fprintf(stderr,"(e.g. e+^ -> [e>], etc.)\n"); fprintf(stderr," 5 table for printers that allow overstriking "); fprintf(stderr,"with backspace\n"); fprintf(stderr," 6 IBM PC character set (code page 437) output\n"); fprintf(stderr,"\nModify any of the tables by adding command "); fprintf(stderr,"line arguments like e.g. '169=(C)',\nwhere 169 is the "); fprintf(stderr,"character number of the Latin 1 copyright sign and '(C)' "); fprintf(stderr,"is\nyour replacement. With 'SUB=_', you can "); fprintf(stderr,"specify that you prefer '_' instead of\n'?' if no "); fprintf(stderr,"reasonable replacement is possible. Option '-s' avoids "); fprintf(stderr,"spaces being\nremoved for column correction and "); fprintf(stderr,"-l prints the table to standard output.\n"); exit(1); } /* Conversion tables for displaying the G1 set (0xa0-0xff) of ISO Latin 1 (ISO 8859-1) with 7-bit ASCII characters. Version 1.2 -- error corrections are welcome Table Purpose 0 universal table for many languages 1 single-spacing universal table 2 table for Danish, Dutch, German, Norwegian and Swedish 3 table for Danish, Finnish, Norwegian and Swedish using the appropriate ISO 646 variant. 4 table with RFC 1345 codes in brackets 5 table for printers that allow overstriking with backspace 6 table for IBM PC character set (code page 437) Markus Kuhn */ #define SUB NULL /* used if no reasonable ASCII string is possible */ static char *iso2asc[ISO_TABLES][96] = {{ " ","!","c",SUB,SUB,"Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-", " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?", "A","A","A","A","A","A","AE","C","E","E","E","E","I","I","I","I", "D","N","O","O","O","O","O","x","O","U","U","U","U","Y","Th","ss", "a","a","a","a","a","a","ae","c","e","e","e","e","i","i","i","i", "d","n","o","o","o","o","o",":","o","u","u","u","u","y","th","y" },{ " ","!","c",SUB,SUB,"Y","|",SUB,"\"","c","a","<","-","-","R","-", " ",SUB,"2","3","'","u","P",".",",","1","o",">",SUB,SUB,SUB,"?", "A","A","A","A","A","A","A","C","E","E","E","E","I","I","I","I", "D","N","O","O","O","O","O","x","O","U","U","U","U","Y","T","s", "a","a","a","a","a","a","a","c","e","e","e","e","i","i","i","i", "d","n","o","o","o","o","o",":","o","u","u","u","u","y","t","y" },{ " ","!","c",SUB,SUB,"Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-", " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?", "A","A","A","A","Ae","Aa","AE","C","E","E","E","E","I","I","I","I", "D","N","O","O","O","O","Oe","x","Oe","U","U","U","Ue","Y","Th","ss", "a","a","a","a","ae","aa","ae","c","e","e","e","e","i","i","i","i", "d","n","o","o","o","o","oe",":","oe","u","u","u","ue","y","th","ij" },{ " ","!","c",SUB,"$","Y","|",SUB,"\"","(c)","a","<<","-","-","(R)","-", " ","+/-","2","3","'","u","P",".",",","1","o",">>"," 1/4"," 1/2"," 3/4","?", "A","A","A","A","[","]","[","C","E","@","E","E","I","I","I","I", "D","N","O","O","O","O","\\","x","\\","U","U","U","^","Y","Th","ss", "a","a","a","a","{","}","{","c","e","`","e","e","i","i","i","i", "d","n","o","o","o","o","|",":","|","u","u","u","~","y","th","y" },{ "[NS]","[!I]","[Ct]","[Pd]","[Cu]","[Ye]","[BB]","[SE]", "[':]","[Co]","[-a]","[<<]","[NO]","[--]","[Rg]","['-]", "[DG]","[+-]","[2S]","[3S]","['']","[My]","[PI]","[.M]", "[',]","[1S]","[-o]","[>>]","[14]","[12]","[34]","[?I]", "[A!]","[A']","[A>]","[A?]","[A:]","[AA]","[AE]","[C,]", "[E!]","[E']","[E>]","[E:]","[I!]","[I']","[I>]","[I:]", "[D-]","[N?]","[O!]","[O']","[O>]","[O?]","[O:]","[*X]", "[O/]","[U!]","[U']","[U>]","[U:]","[Y']","[TH]","[ss]", "[a!]","[a']","[a>]","[a?]","[a:]","[aa]","[ae]","[c,]", "[e!]","[e']","[e>]","[e:]","[i!]","[i']","[i>]","[i:]", "[d-]","[n?]","[o!]","[o']","[o>]","[o?]","[o:]","[-:]", "[o/]","[u!]","[u']","[u>]","[u:]","[y']","[th]","[y:]" },{ " ","!","c\b|","L\b-","o\bX","Y\b=","|",SUB, "\"","(c)","a\b_","<<","-\b,","-","(R)","-", " ","+\b_","2","3","'","u","P",".", ",","1","o\b_",">>"," 1/4"," 1/2"," 3/4","?", "A\b`","A\b'","A\b^","A\b~","A\b\"","Aa","AE","C\b,", "E\b`","E\b'","E\b^","E\b\"","I\b`","I\b'","I\b^","I\b\"", "D\b-","N\b~","O\b`","O\b'","O\b^","O\b~","O\b\"","x", "O\b/","U\b`","U\b'","U\b^","U\b\"","Y\b'","Th","ss", "a\b`","a\b'","a\b^","a\b~","a\b\"","aa","ae","c\b,", "e\b`","e\b'","e\b^","e\b\"","i\b`","i\b'","i\b^","i\b\"", "d\b-","n\b~","o\b`","o\b'","o\b^","o\b~","o\b\"","-\b:", "o\b/","u\b`","u\b'","u\b^","u\b\"","y\b'","th","y\b\"" },{ "\377","\255","\233","\234",SUB,"\235","|","\25", "\"","(c)","\246","\256","\252","-","(R)","-", "\370","\361","\375","3","'","\346","\24","\371", ",","1","\247","\257","\254","\253"," 3/4","\250", "A","A","A","A","\216","\217","\222","\200", "E","\220","E","E","I","I","I","I", "D","\245","O","O","O","O","\231","x", "\355","U","U","U","\232","Y","T","\341", "\205","\240","\203","a","\204","\206","\221","\207", "\212","\202","\210","\211","\215","\241","\214","\213", "d","\244","\225","\242","\223","o","\224","\366", "\355","\227","\243","\226","\201","y","t","\230" }}; /* * Transform an 8-bit ISO Latin 1 string iso into a 7-bit ASCII string asc * readable on old terminals using conversion table t. * * worst case: strlen(iso) == 4*strlen(asc) */ void Latin1toASCII(iso, asc, t) unsigned char *iso, *asc; int t; { char *p, **tab; if (iso==NULL || asc==NULL) return; tab = iso2asc[t] - 0xa0; while (*iso) { if (*iso > 0x9f) { p = tab[*(iso++)]; while (*p) *(asc++) = *(p++); } else { *(asc++) = *(iso++); } } *asc = 0; return; } /* * Transform an 8-bit ISO Latin 1 string iso into a 7-bit ASCII string asc * readable on old terminals using conversion table t. Remove SPACE and * TAB characters where appropriate, in order to preserve the layout * of tables, etc. as much as possible. * * worst case: strlen(iso) == 4*strlen(asc) */ void CorLatin1toASCII(iso, asc, t) unsigned char *iso, *asc; int t; { char *p, **tab; int first; /* flag for first SPACE/TAB after other characters */ int i, a; /* column counters in iso and asc */ /* TABSTOP(x) is the column of the character after the TAB at column x. First column is 0, of course. */ # define TABSTOP(x) (((x) - ((x)&7)) + 8) if (iso==NULL || asc==NULL) return; tab = iso2asc[t] - 0xa0; first = 1; i = a = 0; while (*iso) { if (*iso > 0x9f) { p = tab[*(iso++)]; i++; first = 1; while (*p) { *(asc++) = *(p++); a++; } } else { if (a > i && ((*iso == ' ') || (*iso == '\t'))) { /* spaces or TABS should be removed */ if (*iso == ' ') { /* only the first space after a letter must not be removed */ if (first) { *(asc++) = ' '; a++; first = 0; } i++; } else { /* here: *iso == '\t' */ if (a >= TABSTOP(i)) { /* remove TAB or replace it with SPACE if necessary */ if (first) { *(asc++) = ' '; a++; first = 0; } } else { /* TAB will correct the column difference */ *(asc++) = '\t'; /* = *iso */ a = TABSTOP(a); /* = TABSTOP(i), because i < a < TABSTOP(i) */ } i = TABSTOP(i); } iso++; } else { /* just copy the characters and advance the column counters */ if (*iso == '\t') { a = i = TABSTOP(i); /* = TABSTOP(a), because here a = i */ } else if (*iso == '\b') { a--; i--; } else { a++; i++; } *(asc++) = *(iso++); first = 1; } } } *asc = 0; return; } int main(argc, argv) int argc; char **argv; { int table; int corr = 1, list = 0; int i,j,code; if (argc < 2) usage(); table = argv[1][0] - '0'; if (table < 0 || table >= ISO_TABLES || argv[1][1] != '\0') usage(); for (i = 2; i < argc; i++) if (argv[i][0] == '-' || argv[i][0] == '/') switch(argv[i][1]) { case 's': case 'S': corr = 0; break; case 'l': case 'L': list = 1; break; default: usage(); } else { if ((argv[i][0] == 's' || argv[i][0] == 'S') && (argv[i][1] == 'u' || argv[i][1] == 'U') && (argv[i][2] == 'b' || argv[i][2] == 'B') && argv[i][3] == '=') for (j = 0x00; j < 0x60; j++) { if (iso2asc[table][j] == SUB) iso2asc[table][j] = argv[i] + 4; } else { if (sscanf(argv[i], "%i=%n", &code, &j) == EOF) usage(); if (code < 160 || code > 255) usage(); iso2asc[table][code - 160] = argv[i] + j; } } /* default SUB */ for (j = 0x00; j < 0x60; j++) if (iso2asc[table][j] == SUB) iso2asc[table][j] = "?"; if (list) { for (i = 0x00; i<0x60; i++) printf(((i & 15) == 15) ? "%4s\n" : "%4s ", iso2asc[table][i]); exit(0); } while (fgets(inbuf, BUF_LENGTH, stdin) != NULL) { if (corr) CorLatin1toASCII((unsigned char *) inbuf, (unsigned char *) outbuf, table); else Latin1toASCII((unsigned char *) inbuf, (unsigned char *) outbuf, table); if (fputs(outbuf, stdout) == EOF) perror("Error while writing output in iso2asc"); } if (ferror(stdin)) perror("Error while reading input in iso2asc"); return(0); }