Frames | No Frames |
1: /* PipelineFactory.java -- 2: Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: package gnu.xml.pipeline; 39: 40: import java.io.File; 41: import java.io.FileOutputStream; 42: import java.io.IOException; 43: import java.io.OutputStream; 44: import java.io.OutputStreamWriter; 45: import java.lang.reflect.Constructor; 46: import java.util.StringTokenizer; 47: 48: import org.xml.sax.*; 49: import org.xml.sax.ext.*; 50: 51: 52: /** 53: * This provides static factory methods for creating simple event pipelines. 54: * These pipelines are specified by strings, suitable for passing on 55: * command lines or embedding in element attributes. For example, one way 56: * to write a pipeline that restores namespace syntax, validates (stopping 57: * the pipeline on validity errors) and then writes valid data to standard 58: * output is this: <pre> 59: * nsfix | validate | write ( stdout )</pre> 60: * 61: * <p> In this syntax, the tokens are always separated by whitespace, and each 62: * stage of the pipeline may optionally have a parameter (which can be a 63: * pipeline) in parentheses. Interior stages are called filters, and the 64: * rightmost end of a pipeline is called a terminus. 65: * 66: * <p> Stages are usually implemented by a single class, which may not be 67: * able to act as both a filter and a terminus; but any terminus can be 68: * automatically turned into a filter, through use of a {@link TeeConsumer}. 69: * The stage identifiers are either class names, or are one of the following 70: * short identifiers built into this class. (Most of these identifiers are 71: * no more than aliases for classes.) The built-in identifiers include:</p> 72: 73: <table border="1" cellpadding="3" cellspacing="0"> 74: <tr bgcolor="#ccccff" class="TableHeadingColor"> 75: <th align="center" width="5%">Stage</th> 76: <th align="center" width="9%">Parameter</th> 77: <th align="center" width="1%">Terminus</th> 78: <th align="center">Description</th> 79: </tr> 80: 81: <tr valign="top" align="center"> 82: <td><a href="../dom/Consumer.html">dom</a></td> 83: <td><em>none</em></td> 84: <td> yes </td> 85: <td align="left"> Applications code can access a DOM Document built 86: from the input event stream. When used as a filter, this buffers 87: data up to an <em>endDocument</em> call, and then uses a DOM parser 88: to report everything that has been recorded (which can easily be 89: less than what was reported to it). </td> 90: </tr> 91: <tr valign="top" align="center"> 92: <td><a href="NSFilter.html">nsfix</a></td> 93: <td><em>none</em></td> 94: <td>no</td> 95: <td align="left">This stage ensures that the XML element and attribute 96: names in its output use namespace prefixes and declarations correctly. 97: That is, so that they match the "Namespace plus LocalName" naming data 98: with which each XML element and attribute is already associated. </td> 99: </tr> 100: <tr valign="top" align="center"> 101: <td><a href="EventFilter.html">null</a></td> 102: <td><em>none</em></td> 103: <td>yes</td> 104: <td align="left">This stage ignores all input event data.</td> 105: </tr> 106: <tr valign="top" align="center"> 107: <td><a href="CallFilter.html">server</a></td> 108: <td><em>required</em><br> server URL </td> 109: <td>no</td> 110: <td align="left">Sends its input as XML request to a remote server, 111: normally a web application server using the HTTP or HTTPS protocols. 112: The output of this stage is the parsed response from that server.</td> 113: </tr> 114: <tr valign="top" align="center"> 115: <td><a href="TeeConsumer.html">tee</a></td> 116: <td><em>required</em><br> first pipeline</td> 117: <td>no</td> 118: <td align="left">This sends its events down two paths; its parameter 119: is a pipeline descriptor for the first path, and the second path 120: is the output of this stage.</td> 121: </tr> 122: 123: <tr valign="top" align="center"> 124: <td><a href="ValidationConsumer.html">validate</a></td> 125: <td><em>none</em></td> 126: <td>yes</td> 127: <td align="left">This checks for validity errors, and reports them 128: through its error handler. The input must include declaration events 129: and some lexical events. </td> 130: </tr> 131: <tr valign="top" align="center"> 132: <td><a href="WellFormednessFilter.html">wf</a></td> 133: <td><em>none</em></td> 134: <td>yes</td> 135: <td align="left"> This class provides some basic "well formedness" 136: tests on the input event stream, and reports a fatal error if any 137: of them fail. One example: start/end calls for elements must match. 138: No SAX parser is permitted to produce malformed output, but other 139: components can easily do so.</td> 140: </tr> 141: <tr valign="top" align="center"> 142: <td>write</td> 143: <td><em>required</em><br> "stdout", "stderr", or filename</td> 144: <td>yes</td> 145: <td align="left"> Writes its input to the specified output, as pretty 146: printed XML text encoded using UTF-8. Input events must be well 147: formed and "namespace fixed", else the output won't be XML (or possibly 148: namespace) conformant. The symbolic names represent 149: <em>System.out</em> and <em>System.err</em> respectively; names must 150: correspond to files which don't yet exist.</td> 151: </tr> 152: <tr valign="top" align="center"> 153: <td>xhtml</td> 154: <td><em>required</em><br> "stdout", "stderr", or filename</td> 155: <td>yes</td> 156: <td align="left"> Like <em>write</em> (above), except that XHTML rules 157: are followed. The XHTML 1.0 Transitional document type is declared, 158: and only ASCII characters are written (for interoperability). Other 159: characters are written as entity or character references; the text is 160: pretty printed.</td> 161: </tr> 162: <tr valign="top" align="center"> 163: <td><a href="XIncludeFilter.html">xinclude</a></td> 164: <td><em>none</em></td> 165: <td>no</td> 166: <td align="left">This stage handles XInclude processing. 167: This is like entity inclusion, except that the included content 168: is declared in-line rather than in the DTD at the beginning of 169: a document. 170: </td> 171: </tr> 172: <tr valign="top" align="center"> 173: <td><a href="XsltFilter.html">xslt</a></td> 174: <td><em>required</em><br> XSLT stylesheet URI</td> 175: <td>no</td> 176: <td align="left">This stage handles XSLT transformation 177: according to a stylesheet. 178: The implementation of the transformation may not actually 179: stream data, although if such an XSLT engine is in use 180: then that can happen. 181: </td> 182: </tr> 183: 184: </table> 185: 186: * <p> Note that {@link EventFilter#bind} can automatically eliminate 187: * some filters by setting SAX2 parser features appropriately. This means 188: * that you can routinely put filters like "nsfix", "validate", or "wf" at the 189: * front of a pipeline (for components that need inputs conditioned to match 190: * that level of correctness), and know that it won't actually be used unless 191: * it's absolutely necessary. 192: * 193: * @author David Brownell 194: */ 195: public class PipelineFactory 196: { 197: /** 198: * Creates a simple pipeline according to the description string passed in. 199: */ 200: public static EventConsumer createPipeline (String description) 201: throws IOException 202: { 203: return createPipeline (description, null); 204: } 205: 206: /** 207: * Extends an existing pipeline by prepending the filter pipeline to the 208: * specified consumer. Some pipelines need more customization than can 209: * be done through this simplified syntax. When they are set up with 210: * direct API calls, use this method to merge more complex pipeline 211: * segments with easily configured ones. 212: */ 213: public static EventConsumer createPipeline ( 214: String description, 215: EventConsumer next 216: ) throws IOException 217: { 218: // tokens are (for now) what's separated by whitespace; 219: // very easy to parse, but IDs never have spaces. 220: 221: StringTokenizer tokenizer; 222: String tokens []; 223: 224: tokenizer = new StringTokenizer (description); 225: tokens = new String [tokenizer.countTokens ()]; 226: for (int i = 0; i < tokens.length; i++) 227: tokens [i] = tokenizer.nextToken (); 228: 229: PipelineFactory factory = new PipelineFactory (); 230: Pipeline pipeline = factory.parsePipeline (tokens, next); 231: 232: return pipeline.createPipeline (); 233: } 234: 235: 236: private PipelineFactory () { /* NYET */ } 237: 238: 239: /** 240: * Extends an existing pipeline by prepending a pre-tokenized filter 241: * pipeline to the specified consumer. Tokens are class names (or the 242: * predefined aliases) left and right parenthesis, and the vertical bar. 243: */ 244: public static EventConsumer createPipeline ( 245: String tokens [], 246: EventConsumer next 247: ) throws IOException 248: { 249: PipelineFactory factory = new PipelineFactory (); 250: Pipeline pipeline = factory.parsePipeline (tokens, next); 251: 252: return pipeline.createPipeline (); 253: } 254: 255: 256: private String tokens []; 257: private int index; 258: 259: private Pipeline parsePipeline (String toks [], EventConsumer next) 260: { 261: tokens = toks; 262: index = 0; 263: 264: Pipeline retval = parsePipeline (next); 265: 266: if (index != toks.length) 267: throw new ArrayIndexOutOfBoundsException ( 268: "extra token: " + tokens [index]); 269: return retval; 270: } 271: 272: // pipeline ::= stage | stage '|' pipeline 273: private Pipeline parsePipeline (EventConsumer next) 274: { 275: Pipeline retval = new Pipeline (parseStage ()); 276: 277: // minimal pipelines: "stage" and "... | id" 278: if (index > (tokens.length - 2) 279: || !"|".equals (tokens [index]) 280: ) { 281: retval.next = next; 282: return retval; 283: } 284: index++; 285: retval.rest = parsePipeline (next); 286: return retval; 287: } 288: 289: // stage ::= id | id '(' pipeline ')' 290: private Stage parseStage () 291: { 292: Stage retval = new Stage (tokens [index++]); 293: 294: // minimal stages: "id" and "id ( id )" 295: if (index > (tokens.length - 2) 296: || !"(".equals (tokens [index]) /*)*/ 297: ) 298: return retval; 299: 300: index++; 301: retval.param = parsePipeline (null); 302: if (index >= tokens.length) 303: throw new ArrayIndexOutOfBoundsException ( 304: "missing right paren"); 305: if (/*(*/ !")".equals (tokens [index++])) 306: throw new ArrayIndexOutOfBoundsException ( 307: "required right paren, not: " + tokens [index - 1]); 308: return retval; 309: } 310: 311: 312: // 313: // these classes obey the conventions for constructors, so they're 314: // only built in to this table of shortnames 315: // 316: // - filter (one or two types of arglist) 317: // * last constructor is 'next' element 318: // * optional (first) string parameter 319: // 320: // - terminus (one or types of arglist) 321: // * optional (only) string parameter 322: // 323: // terminus stages are transformed into filters if needed, by 324: // creating a "tee". filter stages aren't turned to terminus 325: // stages though; either eliminate such stages, or add some 326: // terminus explicitly. 327: // 328: private static final String builtinStages [][] = { 329: { "dom", "gnu.xml.dom.Consumer" }, 330: { "nsfix", "gnu.xml.pipeline.NSFilter" }, 331: { "null", "gnu.xml.pipeline.EventFilter" }, 332: { "server", "gnu.xml.pipeline.CallFilter" }, 333: { "tee", "gnu.xml.pipeline.TeeConsumer" }, 334: { "validate", "gnu.xml.pipeline.ValidationConsumer" }, 335: { "wf", "gnu.xml.pipeline.WellFormednessFilter" }, 336: { "xinclude", "gnu.xml.pipeline.XIncludeFilter" }, 337: { "xslt", "gnu.xml.pipeline.XsltFilter" }, 338: 339: // XXX want: option for validate, to preload external part of a DTD 340: 341: // xhtml, write ... nyet generic-ready 342: }; 343: 344: private static class Stage 345: { 346: String id; 347: Pipeline param; 348: 349: Stage (String name) 350: { id = name; } 351: 352: public String toString () 353: { 354: if (param == null) 355: return id; 356: return id + " ( " + param + " )"; 357: } 358: 359: private void fail (String message) 360: throws IOException 361: { 362: throw new IOException ("in '" + id 363: + "' stage of pipeline, " + message); 364: } 365: 366: EventConsumer createStage (EventConsumer next) 367: throws IOException 368: { 369: String name = id; 370: 371: // most builtins are just class aliases 372: for (int i = 0; i < builtinStages.length; i++) { 373: if (id.equals (builtinStages [i][0])) { 374: name = builtinStages [i][1]; 375: break; 376: } 377: } 378: 379: // Save output as XML or XHTML text 380: if ("write".equals (name) || "xhtml".equals (name)) { 381: String filename; 382: boolean isXhtml = "xhtml".equals (name); 383: OutputStream out = null; 384: TextConsumer consumer; 385: 386: if (param == null) 387: fail ("parameter is required"); 388: 389: filename = param.toString (); 390: if ("stdout".equals (filename)) 391: out = System.out; 392: else if ("stderr".equals (filename)) 393: out = System.err; 394: else { 395: File f = new File (filename); 396: 397: /* 398: if (!f.isAbsolute ()) 399: fail ("require absolute file paths"); 400: */ 401: if (f.exists ()) 402: fail ("file already exists: " + f.getName ()); 403: 404: // XXX this races against the existence test 405: out = new FileOutputStream (f); 406: } 407: 408: if (!isXhtml) 409: consumer = new TextConsumer (out); 410: else 411: consumer = new TextConsumer ( 412: new OutputStreamWriter (out, "8859_1"), 413: true); 414: 415: consumer.setPrettyPrinting (true); 416: if (next == null) 417: return consumer; 418: return new TeeConsumer (consumer, next); 419: 420: } else { 421: // 422: // Here go all the builtins that are just aliases for 423: // classes, and all stage IDs that started out as such 424: // class names. The following logic relies on several 425: // documented conventions for constructor invocation. 426: // 427: String msg = null; 428: 429: try { 430: Class klass = Class.forName (name); 431: Class argTypes [] = null; 432: Constructor constructor = null; 433: boolean filter = false; 434: Object params [] = null; 435: Object obj = null; 436: 437: // do we need a filter stage? 438: if (next != null) { 439: // "next" consumer is always passed, with 440: // or without the optional string param 441: if (param == null) { 442: argTypes = new Class [1]; 443: argTypes [0] = EventConsumer.class; 444: 445: params = new Object [1]; 446: params [0] = next; 447: 448: msg = "no-param filter"; 449: } else { 450: argTypes = new Class [2]; 451: argTypes [0] = String.class; 452: argTypes [1] = EventConsumer.class; 453: 454: params = new Object [2]; 455: params [0] = param.toString (); 456: params [1] = next; 457: 458: msg = "one-param filter"; 459: } 460: 461: 462: try { 463: constructor = klass.getConstructor (argTypes); 464: } catch (NoSuchMethodException e) { 465: // try creating a filter from a 466: // terminus and a tee 467: filter = true; 468: msg += " built from "; 469: } 470: } 471: 472: // build from a terminus stage, with or 473: // without the optional string param 474: if (constructor == null) { 475: String tmp; 476: 477: if (param == null) { 478: argTypes = new Class [0]; 479: params = new Object [0]; 480: 481: tmp = "no-param terminus"; 482: } else { 483: argTypes = new Class [1]; 484: argTypes [0] = String.class; 485: 486: params = new Object [1]; 487: params [0] = param.toString (); 488: 489: tmp = "one-param terminus"; 490: } 491: if (msg == null) 492: msg = tmp; 493: else 494: msg += tmp; 495: constructor = klass.getConstructor (argTypes); 496: // NOT creating terminus by dead-ending 497: // filters ... users should think about 498: // that one, something's likely wrong 499: } 500: 501: obj = constructor.newInstance (params); 502: 503: // return EventConsumers directly, perhaps after 504: // turning them into a filter 505: if (obj instanceof EventConsumer) { 506: if (filter) 507: return new TeeConsumer ((EventConsumer) obj, next); 508: return (EventConsumer) obj; 509: } 510: 511: // if it's not a handler, it's an error 512: // we can wrap handlers in a filter 513: EventFilter retval = new EventFilter (); 514: boolean updated = false; 515: 516: if (obj instanceof ContentHandler) { 517: retval.setContentHandler ((ContentHandler) obj); 518: updated = true; 519: } 520: if (obj instanceof DTDHandler) { 521: retval.setDTDHandler ((DTDHandler) obj); 522: updated = true; 523: } 524: if (obj instanceof LexicalHandler) { 525: retval.setProperty ( 526: EventFilter.PROPERTY_URI + "lexical-handler", 527: obj); 528: updated = true; 529: } 530: if (obj instanceof DeclHandler) { 531: retval.setProperty ( 532: EventFilter.PROPERTY_URI + "declaration-handler", 533: obj); 534: updated = true; 535: } 536: 537: if (!updated) 538: fail ("class is neither Consumer nor Handler"); 539: 540: if (filter) 541: return new TeeConsumer (retval, next); 542: return retval; 543: 544: } catch (IOException e) { 545: throw e; 546: 547: } catch (NoSuchMethodException e) { 548: fail (name + " constructor missing -- " + msg); 549: 550: } catch (ClassNotFoundException e) { 551: fail (name + " class not found"); 552: 553: } catch (Exception e) { 554: // e.printStackTrace (); 555: fail ("stage not available: " + e.getMessage ()); 556: } 557: } 558: // NOTREACHED 559: return null; 560: } 561: } 562: 563: private static class Pipeline 564: { 565: Stage stage; 566: 567: // rest may be null 568: Pipeline rest; 569: EventConsumer next; 570: 571: Pipeline (Stage s) 572: { stage = s; } 573: 574: public String toString () 575: { 576: if (rest == null && next == null) 577: return stage.toString (); 578: if (rest != null) 579: return stage + " | " + rest; 580: throw new IllegalArgumentException ("next"); 581: } 582: 583: EventConsumer createPipeline () 584: throws IOException 585: { 586: if (next == null) { 587: if (rest == null) 588: next = stage.createStage (null); 589: else 590: next = stage.createStage (rest.createPipeline ()); 591: } 592: return next; 593: } 594: } 595: 596: /* 597: public static void main (String argv []) 598: { 599: try { 600: // three basic terminus cases 601: createPipeline ("null"); 602: createPipeline ("validate"); 603: createPipeline ("write ( stdout )"); 604: 605: // four basic filters 606: createPipeline ("nsfix | write ( stderr )"); 607: createPipeline ("wf | null"); 608: createPipeline ("null | null"); 609: createPipeline ( 610: "call ( http://www.example.com/services/xml-1a ) | xhtml ( stdout )"); 611: 612: // tee junctions 613: createPipeline ("tee ( validate ) | write ( stdout )"); 614: createPipeline ("tee ( nsfix | write ( stdout ) ) | validate"); 615: 616: // longer pipeline 617: createPipeline ("nsfix | tee ( validate ) | write ( stdout )"); 618: createPipeline ( 619: "null | wf | nsfix | tee ( validate ) | write ( stdout )"); 620: 621: // try some parsing error cases 622: try { 623: createPipeline ("null ("); // extra token '(' 624: System.err.println ("** didn't report error"); 625: } catch (Exception e) { 626: System.err.println ("== err: " + e.getMessage ()); } 627: 628: try { 629: createPipeline ("nsfix |"); // extra token '|' 630: System.err.println ("** didn't report error"); 631: } catch (Exception e) { 632: System.err.println ("== err: " + e.getMessage ()); } 633: 634: try { 635: createPipeline ("xhtml ( foo"); // missing right paren 636: System.err.println ("** didn't report error"); 637: } catch (Exception e) { 638: System.err.println ("== err: " + e.getMessage ()); } 639: 640: try { 641: createPipeline ("xhtml ( foo bar"); // required right paren 642: System.err.println ("** didn't report error"); 643: } catch (Exception e) { 644: System.err.println ("== err: " + e.getMessage ()); } 645: 646: try { 647: createPipeline ("tee ( nsfix | validate");// missing right paren 648: System.err.println ("** didn't report error"); 649: } catch (Exception e) { 650: System.err.println ("== err: " + e.getMessage ()); } 651: 652: // try some construction error cases 653: 654: try { 655: createPipeline ("call"); // missing param 656: System.err.println ("** didn't report error"); 657: } catch (Exception e) { 658: System.err.println ("== err: " + e.getMessage ()); } 659: try { 660: createPipeline ("call ( foobar )"); // broken param 661: System.err.println ("** didn't report error"); 662: } catch (Exception e) { 663: System.err.println ("== err: " + e.getMessage ()); } 664: try { 665: createPipeline ("nsfix ( foobar )"); // illegal param 666: System.err.println ("** didn't report error"); 667: } catch (Exception e) { 668: System.err.println ("== err: " + e.getMessage ()); } 669: try { 670: createPipeline ("null ( foobar )"); // illegal param 671: System.err.println ("** didn't report error"); 672: } catch (Exception e) { 673: System.err.println ("== err: " + e.getMessage ()); } 674: try { 675: createPipeline ("wf ( foobar )"); // illegal param 676: System.err.println ("** didn't report error"); 677: } catch (Exception e) { 678: System.err.println ("== err: " + e.getMessage ()); } 679: try { 680: createPipeline ("xhtml ( foobar.html )"); 681: new File ("foobar.html").delete (); 682: // now supported 683: } catch (Exception e) { 684: System.err.println ("** err: " + e.getMessage ()); } 685: try { 686: createPipeline ("xhtml"); // missing param 687: System.err.println ("** didn't report error"); 688: } catch (Exception e) { 689: System.err.println ("== err: " + e.getMessage ()); } 690: try { 691: createPipeline ("write ( stdout ) | null"); // nonterminal 692: System.err.println ("** didn't report error"); 693: } catch (Exception e) { 694: System.err.println ("== err: " + e.getMessage ()); } 695: try { 696: createPipeline ("validate | null"); 697: // now supported 698: } catch (Exception e) { 699: System.err.println ("** err: " + e.getMessage ()); } 700: try { 701: createPipeline ("validate ( foo )"); // illegal param 702: System.err.println ("** didn't report error"); 703: } catch (Exception e) { 704: System.err.println ("== err: " + e.getMessage ()); } 705: try { 706: createPipeline ("tee"); // missing param 707: System.err.println ("** didn't report error"); 708: } catch (Exception e) { 709: System.err.println ("== err: " + e.getMessage ()); } 710: try { 711: // only builtins so far 712: createPipeline ("com.example.xml.FilterClass"); 713: System.err.println ("** didn't report error"); 714: } catch (Exception e) { 715: System.err.println ("== err: " + e.getMessage ()); } 716: 717: } catch (Exception e) { 718: e.printStackTrace (); 719: } 720: } 721: /**/ 722: 723: }