Source for gnu.xml.pipeline.WellFormednessFilter

   1: /* WellFormednessFilter.java --
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.pipeline;
  39: 
  40: import java.util.EmptyStackException;
  41: import java.util.Stack;
  42: 
  43: import org.xml.sax.Attributes;
  44: import org.xml.sax.ErrorHandler;
  45: import org.xml.sax.Locator;
  46: import org.xml.sax.SAXException;
  47: import org.xml.sax.SAXParseException;
  48: 
  49: /**
  50:  * This filter reports fatal exceptions in the case of event streams that
  51:  * are not well formed.  The rules currently tested include: <ul>
  52:  *
  53:  *      <li>setDocumentLocator ... may be called only before startDocument
  54:  *
  55:  *      <li>startDocument/endDocument ... must be paired, and all other
  56:  *      calls (except setDocumentLocator) must be nested within these.
  57:  *
  58:  *      <li>startElement/endElement ... must be correctly paired, and
  59:  *      may never appear within CDATA sections.
  60:  *
  61:  *      <li>comment ... can't contain "--"
  62:  *
  63:  *      <li>character data ... can't contain "]]&gt;"
  64:  *
  65:  *      <li>whitespace ... can't contain CR
  66:  *
  67:  *      <li>whitespace and character data must be within an element
  68:  *
  69:  *      <li>processing instruction ... can't contain "?&gt;" or CR
  70:  *
  71:  *      <li>startCDATA/endCDATA ... must be correctly paired.
  72:  *
  73:  *      </ul>
  74:  *
  75:  * <p> Other checks for event stream correctness may be provided in
  76:  * the future.  For example, insisting that
  77:  * entity boundaries nest correctly,
  78:  * namespace scopes nest correctly,
  79:  * namespace values never contain relative URIs,
  80:  * attributes don't have "&lt;" characters;
  81:  * and more.
  82:  *
  83:  * @author David Brownell
  84:  */
  85: public final class WellFormednessFilter extends EventFilter
  86: {
  87:     private boolean             startedDoc;
  88:     private Stack               elementStack = new Stack ();
  89:     private boolean             startedCDATA;
  90:     private String              dtdState = "before";
  91: 
  92: 
  93:     /**
  94:      * Swallows all events after performing well formedness checks.
  95:      */
  96:         // constructor used by PipelineFactory
  97:     public WellFormednessFilter ()
  98:         { this (null); }
  99: 
 100: 
 101:     /**
 102:      * Passes events through to the specified consumer, after first
 103:      * processing them.
 104:      */
 105:         // constructor used by PipelineFactory
 106:     public WellFormednessFilter (EventConsumer consumer)
 107:     {
 108:         super (consumer);
 109: 
 110:         setContentHandler (this);
 111:         setDTDHandler (this);
 112: 
 113:         try {
 114:             setProperty (LEXICAL_HANDLER, this);
 115:         } catch (SAXException e) { /* can't happen */ }
 116:     }
 117: 
 118:     /**
 119:      * Resets state as if any preceding event stream was well formed.
 120:      * Particularly useful if it ended through some sort of error,
 121:      * and the endDocument call wasn't made.
 122:      */
 123:     public void reset ()
 124:     {
 125:         startedDoc = false;
 126:         startedCDATA = false;
 127:         elementStack.removeAllElements ();
 128:     }
 129: 
 130: 
 131:     private SAXParseException getException (String message)
 132:     {
 133:         SAXParseException       e;
 134:         Locator                 locator = getDocumentLocator ();
 135: 
 136:         if (locator == null)
 137:             return new SAXParseException (message, null, null, -1, -1);
 138:         else
 139:             return new SAXParseException (message, locator);
 140:     }
 141: 
 142:     private void fatalError (String message)
 143:     throws SAXException
 144:     {
 145:         SAXParseException       e = getException (message);
 146:         ErrorHandler            handler = getErrorHandler ();
 147: 
 148:         if (handler != null)
 149:             handler.fatalError (e);
 150:         throw e;
 151:     }
 152: 
 153:     /**
 154:      * Throws an exception when called after startDocument.
 155:      *
 156:      * @param locator the locator, to be used in error reporting or relative
 157:      *  URI resolution.
 158:      *
 159:      * @exception IllegalStateException when called after the document
 160:      *  has already been started
 161:      */
 162:     public void setDocumentLocator (Locator locator)
 163:     {
 164:         if (startedDoc)
 165:             throw new IllegalStateException (
 166:                     "setDocumentLocator called after startDocument");
 167:         super.setDocumentLocator (locator);
 168:     }
 169: 
 170:     public void startDocument () throws SAXException
 171:     {
 172:         if (startedDoc)
 173:             fatalError ("startDocument called more than once");
 174:         startedDoc = true;
 175:         startedCDATA = false;
 176:         elementStack.removeAllElements ();
 177:         super.startDocument ();
 178:     }
 179: 
 180:     public void startElement (
 181:         String uri, String localName,
 182:         String qName, Attributes atts
 183:     ) throws SAXException
 184:     {
 185:         if (!startedDoc)
 186:             fatalError ("callback outside of document?");
 187:         if ("inside".equals (dtdState))
 188:             fatalError ("element inside DTD?");
 189:         else
 190:             dtdState = "after";
 191:         if (startedCDATA)
 192:             fatalError ("element inside CDATA section");
 193:         if (qName == null || "".equals (qName))
 194:             fatalError ("startElement name missing");
 195:         elementStack.push (qName);
 196:         super.startElement (uri, localName, qName, atts);
 197:     }
 198: 
 199:     public void endElement (String uri, String localName, String qName)
 200:     throws SAXException
 201:     {
 202:         if (!startedDoc)
 203:             fatalError ("callback outside of document?");
 204:         if (startedCDATA)
 205:             fatalError ("element inside CDATA section");
 206:         if (qName == null || "".equals (qName))
 207:             fatalError ("endElement name missing");
 208: 
 209:         try {
 210:             String      top = (String) elementStack.pop ();
 211: 
 212:             if (!qName.equals (top))
 213:                 fatalError ("<" + top + " ...>...</" + qName + ">");
 214:             // XXX could record/test namespace info
 215:         } catch (EmptyStackException e) {
 216:             fatalError ("endElement without startElement:  </" + qName + ">");
 217:         }
 218:         super.endElement (uri, localName, qName);
 219:     }
 220: 
 221:     public void endDocument () throws SAXException
 222:     {
 223:         if (!startedDoc)
 224:             fatalError ("callback outside of document?");
 225:         dtdState = "before";
 226:         startedDoc = false;
 227:         super.endDocument ();
 228:     }
 229: 
 230: 
 231:     public void startDTD (String root, String publicId, String systemId)
 232:     throws SAXException
 233:     {
 234:         if (!startedDoc)
 235:             fatalError ("callback outside of document?");
 236:     if ("before" != dtdState)
 237:             fatalError ("two DTDs?");
 238:         if (!elementStack.empty ())
 239:             fatalError ("DTD must precede root element");
 240:         dtdState = "inside";
 241:         super.startDTD (root, publicId, systemId);
 242:     }
 243: 
 244:     public void notationDecl (String name, String publicId, String systemId)
 245:     throws SAXException
 246:     {
 247: // FIXME: not all parsers will report startDTD() ...
 248: // we'd rather insist we're "inside".
 249:     if ("after" == dtdState)
 250:             fatalError ("not inside DTD");
 251:         super.notationDecl (name, publicId, systemId);
 252:     }
 253: 
 254:     public void unparsedEntityDecl (String name,
 255:         String publicId, String systemId, String notationName)
 256:     throws SAXException
 257:     {
 258: // FIXME: not all parsers will report startDTD() ...
 259: // we'd rather insist we're "inside".
 260:     if ("after" == dtdState)
 261:             fatalError ("not inside DTD");
 262:         super.unparsedEntityDecl (name, publicId, systemId, notationName);
 263:     }
 264: 
 265:     // FIXME:  add the four DeclHandler calls too
 266: 
 267:     public void endDTD ()
 268:     throws SAXException
 269:     {
 270:         if (!startedDoc)
 271:             fatalError ("callback outside of document?");
 272:         if ("inside" != dtdState)
 273:             fatalError ("DTD ends without start?");
 274:         dtdState = "after";
 275:         super.endDTD ();
 276:     }
 277: 
 278:     public void characters (char ch [], int start, int length)
 279:     throws SAXException
 280:     {
 281:         int here = start, end = start + length;
 282:         if (elementStack.empty ())
 283:             fatalError ("characters must be in an element");
 284:         while (here < end) {
 285:             if (ch [here++] != ']')
 286:                 continue;
 287:             if (here == end)    // potential problem ...
 288:                 continue;
 289:             if (ch [here++] != ']')
 290:                 continue;
 291:             if (here == end)    // potential problem ...
 292:                 continue;
 293:             if (ch [here++] == '>')
 294:                 fatalError ("character data can't contain \"]]>\"");
 295:         }
 296:         super.characters (ch, start, length);
 297:     }
 298: 
 299:     public void ignorableWhitespace (char ch [], int start, int length)
 300:     throws SAXException
 301:     {
 302:         int here = start, end = start + length;
 303:         if (elementStack.empty ())
 304:             fatalError ("characters must be in an element");
 305:         while (here < end) {
 306:             if (ch [here++] == '\r')
 307:                 fatalError ("whitespace can't contain CR");
 308:         }
 309:         super.ignorableWhitespace (ch, start, length);
 310:     }
 311: 
 312:     public void processingInstruction (String target, String data)
 313:     throws SAXException
 314:     {
 315:         if (data.indexOf ('\r') > 0)
 316:             fatalError ("PIs can't contain CR");
 317:         if (data.indexOf ("?>") > 0)
 318:             fatalError ("PIs can't contain \"?>\"");
 319:     }
 320: 
 321:     public void comment (char ch [], int start, int length)
 322:     throws SAXException
 323:     {
 324:         if (!startedDoc)
 325:             fatalError ("callback outside of document?");
 326:         if (startedCDATA)
 327:             fatalError ("comments can't nest in CDATA");
 328:         int here = start, end = start + length;
 329:         while (here < end) {
 330:             if (ch [here] == '\r')
 331:                 fatalError ("comments can't contain CR");
 332:             if (ch [here++] != '-')
 333:                 continue;
 334:             if (here == end)
 335:                 fatalError ("comments can't end with \"--->\"");
 336:             if (ch [here++] == '-')
 337:                 fatalError ("comments can't contain \"--\"");
 338:         }
 339:         super.comment (ch, start, length);
 340:     }
 341: 
 342:     public void startCDATA ()
 343:     throws SAXException
 344:     {
 345:         if (!startedDoc)
 346:             fatalError ("callback outside of document?");
 347:         if (startedCDATA)
 348:             fatalError ("CDATA starts can't nest");
 349:         startedCDATA = true;
 350:         super.startCDATA ();
 351:     }
 352: 
 353:     public void endCDATA ()
 354:     throws SAXException
 355:     {
 356:         if (!startedDoc)
 357:             fatalError ("callback outside of document?");
 358:         if (!startedCDATA)
 359:             fatalError ("CDATA end without start?");
 360:         startedCDATA = false;
 361:         super.endCDATA ();
 362:     }
 363: }