Source for gnu.javax.swing.text.html.css.CSSParser

   1: /* CSSParser.java -- A parser for CSS stylesheets
   2:    Copyright (C) 2006 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.javax.swing.text.html.css;
  40: 
  41: import java.io.BufferedInputStream;
  42: import java.io.File;
  43: import java.io.FileInputStream;
  44: import java.io.IOException;
  45: import java.io.InputStream;
  46: import java.io.InputStreamReader;
  47: import java.io.Reader;
  48: import java.util.StringTokenizer;
  49: 
  50: /**
  51:  * A parser for CSS stylesheets.
  52:  *
  53:  * This parser is based on the simple CSS grammar describe in
  54:  *
  55:  * http://www.w3.org/TR/CSS21/syndata.html .
  56:  *
  57:  * @author Roman Kennke (kennke@aicas.com)
  58:  */
  59: // TODO: Maybe use more restrictive grammar:
  60: // http://www.w3.org/TR/CSS21/grammar.html#q1
  61: public class CSSParser
  62: {
  63: 
  64:   /**
  65:    * The scanner used to read the input streams into more usable tokens.
  66:    */
  67:   private CSSScanner scanner;
  68: 
  69:   /**
  70:    * The parser callback.
  71:    */
  72:   private CSSParserCallback callback;
  73: 
  74:   /**
  75:    * One lookahead token.
  76:    */
  77:   private int lookahead;
  78: 
  79:   /**
  80:    * The parse error.
  81:    */
  82:   private String error;
  83: 
  84:   /**
  85:    * Creates a new CSSParser that parses the specified input.
  86:    *
  87:    * @param in the source to parse
  88:    */
  89:   public CSSParser(Reader in, CSSParserCallback cb)
  90:   {
  91:     scanner = new CSSScanner(in);
  92:     callback = cb;
  93:     lookahead = -1;
  94:   }
  95: 
  96:   /**
  97:    * Parses the input source specified in the constructor.
  98:    *
  99:    * @throws IOException if an IO or parse error occurs
 100:    */
 101:   public void parse()
 102:     throws IOException
 103:   {
 104:     boolean success = parseStylesheet();
 105:     if (! success)
 106:       {
 107:         throw new CSSParserException(error);
 108:       }
 109:   }
 110: 
 111:   /**
 112:    * Parses a stylesheet.
 113:    *
 114:    * @return <code>true</code> if the stylesheet could be parsed successfully,
 115:    *         <code>false</code> otherwise
 116:    *
 117:    * @throws IOException if an IO or parse error occurs
 118:    */
 119:   private boolean parseStylesheet()
 120:     throws IOException
 121:   {
 122:     int token = peekToken();
 123:     while (token != CSSScanner.EOF && (token == CSSScanner.CDC
 124:            || token == CSSScanner.CDO || token == CSSScanner.S
 125:            || parseStatement()))
 126:       {
 127:         if (token == CSSScanner.CDC || token == CSSScanner.CDO
 128:             || token == CSSScanner.S)
 129:           readToken();
 130:         token = peekToken();
 131:       }
 132:     // Last token must be EOF for valid stylesheets, I'd think.
 133:     return token == CSSScanner.EOF;
 134:   }
 135: 
 136:   /**
 137:    * Parses a CSS statement.
 138:    * @return <code>true</code> if the stylesheet could be parsed successfully,
 139:    *         <code>false</code> otherwise
 140:    *
 141:    * @throws IOException if an IO or parse error occurs
 142:    */
 143:   private boolean parseStatement()
 144:     throws IOException
 145:   {
 146:     return parseRuleset() || parseAtRule();
 147:   }
 148: 
 149:   /**
 150:    * Parses a CSS rule set.
 151:    *
 152:    * @return <code>true</code> if the ruleset could be parsed successfully,
 153:    *         <code>false</code> otherwise
 154:    *
 155:    * @throws IOException if an IO or parse error occurs
 156:    */
 157:   private boolean parseRuleset()
 158:     throws IOException
 159:   {
 160:     StringBuilder selector = new StringBuilder();
 161:     parseSelector(selector);
 162:     StringTokenizer selSplitter =
 163:       new StringTokenizer(selector.toString(), ",");
 164:     Selector[] sels = new Selector[selSplitter.countTokens()];
 165:     for (int i = 0; selSplitter.hasMoreTokens(); i++)
 166:       {
 167:         String sel = selSplitter.nextToken().trim();
 168:         sels[i] = new Selector(sel);
 169:       }
 170:     callback.startStatement(sels);
 171:     // Read any number of whitespace.
 172:     int token;
 173:     do
 174:       {
 175:         token = readToken();
 176:       } while (token == CSSScanner.S);
 177:     boolean ret = true;
 178: 
 179:     if (token == CSSScanner.CURLY_LEFT)
 180:       {
 181:         // Read any number of whitespace.
 182:         do
 183:           {
 184:             token = readToken();
 185:           } while (token == CSSScanner.S);
 186:         lookahead = token;
 187: 
 188:         // Maybe read declaration.
 189:         boolean decl = parseDeclaration();
 190:         token = peekToken();
 191:         while (token == CSSScanner.SEMICOLON)
 192:           {
 193:             readToken(); // Read the semicolon.
 194:             // Read any number of whitespace.
 195:             do
 196:               {
 197:                 token = readToken();
 198:               } while (token == CSSScanner.S);
 199:             lookahead = token;
 200: 
 201:             // Maybe read declaration.
 202:             parseDeclaration();
 203:             token = peekToken();
 204:           }
 205:         if (token != CSSScanner.CURLY_RIGHT)
 206:           {
 207:             error = "Expected right curly brace";
 208:             ret = false;
 209:           }
 210:         else
 211:           {
 212:             readToken();
 213:             // Read any number of whitespace.
 214:             do
 215:               {
 216:                 token = readToken();
 217:               } while (token == CSSScanner.S);
 218:             lookahead = token;
 219:             callback.endStatement();
 220:           }
 221:       }
 222:     else
 223:       {
 224:         ret = false;
 225:         error = "Expected left curly brace";
 226:       }
 227:     return ret;
 228:   }
 229: 
 230:   /**
 231:    * Parses a CSS declaration.
 232:    *
 233:    * @return <code>true</code> if the ruleset could be parsed successfully,
 234:    *         <code>false</code> otherwise
 235:    *
 236:    * @throws IOException if an IO or parse error occurs
 237:    */
 238:   private boolean parseDeclaration()
 239:    throws IOException
 240:   {
 241:     // Maybe fetch one DELIM.
 242:     int token = readToken();
 243:     if (token == CSSScanner.DELIM)
 244:       token = readToken();
 245: 
 246:     boolean ret = true;
 247: 
 248:     // Parse property
 249:     String property = null;
 250:     if (token == CSSScanner.IDENT)
 251:       {
 252:         property = new String(scanner.parseBuffer, 0, scanner.tokenEnd);
 253:         // Read any number of whitespace.
 254:         do
 255:           {
 256:             token = readToken();
 257:           } while (token == CSSScanner.S);
 258: 
 259:         // Read ':'.
 260:         if (token == CSSScanner.DELIM && scanner.parseBuffer[0] == ':')
 261:           {
 262:             // Read any number of whitespace.
 263:             do
 264:               {
 265:                 token = readToken();
 266:               } while (token == CSSScanner.S);
 267:             lookahead = token;
 268: 
 269:             StringBuilder value = new StringBuilder();
 270:             if (parseValue(value))
 271:               {
 272:                 callback.declaration(property, value.toString().trim());
 273:               }
 274:             else
 275:               {
 276:                 ret = false;
 277:                 error = "Error while reading the property value";
 278:               }
 279:           }
 280:         else
 281:           {
 282:             ret = false;
 283:             error = "Expected colon to separate property and value";
 284:           }
 285: 
 286:       }
 287:     else
 288:       {
 289:         lookahead = token;
 290:         ret = false;
 291:         error = "Expected IDENT token for property";
 292:       }
 293:     return ret;
 294:   }
 295: 
 296:   /**
 297:    * Parses a property value.
 298:    *
 299:    * @param s the string builder to read the value into
 300:    *
 301:    * @return <code>true</code> if the ruleset could be parsed successfully,
 302:    *         <code>false</code> otherwise
 303:    *
 304:    * @throws IOException if an IO or parse error occurs
 305:    */
 306:   private boolean parseValue(StringBuilder s)
 307:     throws IOException
 308:   {
 309:     // FIXME: Handle block and ATKEYWORD.
 310:     boolean success = parseAny(s);
 311:     while (parseAny(s))
 312:       ;
 313: 
 314:     return success;
 315:   }
 316: 
 317:   /**
 318:    * Parses a selector.
 319:    *
 320:    * @param sel the string buffer to put the selector into
 321:    *
 322:    * @return <code>true</code> if the ruleset could be parsed successfully,
 323:    *         <code>false</code> otherwise
 324:    *
 325:    * @throws IOException if an IO or parse error occurs
 326:    */
 327:   private boolean parseSelector(StringBuilder sel)
 328:     throws IOException
 329:   {
 330:     // At least one any needs to be parsed.
 331:     boolean ret = parseAny(sel);
 332:     if (ret)
 333:       {
 334:         while (parseAny(sel))
 335:           ;
 336:       }
 337:     return ret;
 338:   }
 339: 
 340:   /**
 341:    * Parses the any rule. If s is not null, then the contents of the
 342:    * tokens is appended verbatim.
 343:    *
 344:    * @param s the string builder to append to
 345:    *
 346:    * @return <code>true</code> if the ruleset could be parsed successfully,
 347:    *         <code>false</code> otherwise
 348:    *
 349:    * @throws IOException if an IO or parse error occurs
 350:    */
 351:   private boolean parseAny(StringBuilder s)
 352:     throws IOException
 353:   {
 354:     int token = peekToken();
 355:     boolean ret = false;
 356:     if (token == CSSScanner.IDENT || token == CSSScanner.NUMBER
 357:         || token == CSSScanner.PERCENTAGE || token == CSSScanner.DIMENSION
 358:         || token == CSSScanner.STRING || token == CSSScanner.DELIM
 359:         || token == CSSScanner.URI || token == CSSScanner.HASH
 360:         || token == CSSScanner.UNICODE_RANGE || token == CSSScanner.INCLUDES
 361:         || token == CSSScanner.DASHMATCH)
 362:       {
 363:         if (s != null)
 364:           s.append(scanner.parseBuffer, 0, scanner.tokenEnd);
 365:         readToken();
 366:         ret = true;
 367:       }
 368:     else if (token == CSSScanner.FUNCTION)
 369:       System.err.println("Implement parseAny for FUNCTION");
 370:     else if (token == CSSScanner.PAREN_LEFT)
 371:       System.err.println("Implement parseAny for (");
 372:     else if (token == CSSScanner.BRACE_LEFT)
 373:       System.err.println("Implement parseAny for [");
 374: 
 375:     // Parse any following whitespace too.
 376:     token = peekToken();
 377:     while (token == CSSScanner.S)
 378:       {
 379:         if (s != null)
 380:           s.append(scanner.parseBuffer, 0, scanner.tokenEnd);
 381:         readToken();
 382:         token = peekToken();
 383:       }
 384:     return ret;
 385:   }
 386: 
 387:   /**
 388:    * Parses a CSS at-rule.
 389:    *
 390:    * @return <code>true</code> if the at-rule could be parsed successfully,
 391:    *         <code>false</code> otherwise
 392:    *
 393:    * @throws IOException if an IO or parse error occurs
 394:    */
 395:   private boolean parseAtRule()
 396:     throws IOException
 397:   {
 398:     // FIXME: Implement.
 399:     return false;
 400:   }
 401: 
 402:   /**
 403:    * Reads the next token, and skips the comments.
 404:    *
 405:    * @return the next non-comment token
 406:    */
 407:   private int readToken()
 408:     throws IOException
 409:   {
 410:     int token;
 411:     if (lookahead == -1)
 412:       {
 413:         do
 414:           {
 415:             token = scanner.nextToken();
 416:           } while (token == CSSScanner.COMMENT);
 417:       }
 418:     else
 419:       {
 420:         token = lookahead;
 421:         lookahead = -1;
 422:       }
 423:     return token;
 424:   }
 425: 
 426:   /**
 427:    * Returns the next token to be read, without really reading it. The next
 428:    * call to readToken() will return the same token again.
 429:    *
 430:    * @return the next token to be read, without really reading it
 431:    */
 432:   private int peekToken()
 433:     throws IOException
 434:   {
 435:     int token;
 436:     if (lookahead == -1)
 437:       {
 438:         do
 439:           {
 440:             token = scanner.nextToken();
 441:           } while (token == CSSScanner.COMMENT);
 442:         lookahead = token;
 443:       }
 444:     else
 445:       token = lookahead;
 446:     return token;
 447:   }
 448: 
 449:   /**
 450:    * For testing, we read in the default.css in javax/swing/text/html
 451:    *
 452:    * @param args
 453:    */
 454:   public static void main(String[] args)
 455:   {
 456:     try
 457:       {
 458:         InputStream in;
 459:         if (args.length > 0)
 460:           {
 461:             File file = new File(args[0]);
 462:             in = new FileInputStream(file);
 463:           }
 464:         else
 465:           {
 466:             String name = "/javax/swing/text/html/default.css";
 467:             in = CSSScanner.class.getResourceAsStream(name);
 468:           }
 469:         BufferedInputStream bin = new BufferedInputStream(in);
 470:         InputStreamReader r = new InputStreamReader(bin);
 471:         CSSParserCallback cb = new CSSParserCallback()
 472:         {
 473:           public void startStatement(Selector[] selector)
 474:           {
 475:             System.out.print("startStatement: ");
 476:             for (int i = 0; i < selector.length; i++)
 477:               {
 478:                 System.out.print(selector[i]);
 479:                 if (i < selector.length - 1)
 480:                   System.out.print(',');
 481:                 else
 482:                   System.out.println();
 483:               }
 484:           }
 485:           public void endStatement()
 486:           {
 487:             System.out.println("endStatement");
 488:           }
 489:           public void declaration(String property, String value)
 490:           {
 491:             System.out.println("declaration: " + property + ", " + value);
 492:           }
 493:         };
 494:         CSSParser p = new CSSParser(r, cb);
 495:         p.parse();
 496:       }
 497:     catch (IOException ex)
 498:       {
 499:         ex.printStackTrace();
 500:       }
 501:   }
 502: 
 503: }