Source for java.text.CollationElementIterator

   1: /* CollationElementIterator.java -- Walks through collation elements
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012  Free Software Foundation
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import gnu.java.lang.CPStringBuilder;
  42: 
  43: import java.util.ArrayList;
  44: 
  45: /* Written using "Java Class Libraries", 2nd edition, plus online
  46:  * API docs for JDK 1.2 from http://www.javasoft.com.
  47:  * Status: Believed complete and correct to JDK 1.1.
  48:  */
  49: 
  50: /**
  51:  * This class walks through the character collation elements of a
  52:  * <code>String</code> as defined by the collation rules in an instance of
  53:  * <code>RuleBasedCollator</code>.  There is no public constructor for
  54:  * this class.  An instance is created by calling the
  55:  * <code>getCollationElementIterator</code> method on
  56:  * <code>RuleBasedCollator</code>.
  57:  *
  58:  * @author Aaron M. Renn (arenn@urbanophile.com)
  59:  * @author Tom Tromey (tromey@cygnus.com)
  60:  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  61:  */
  62: public final class CollationElementIterator
  63: {
  64:   /**
  65:    * This is a constant value that is returned to indicate that the end of
  66:    * the string was encountered.
  67:    */
  68:   public static final int NULLORDER = -1;
  69: 
  70:   /**
  71:    * This is the RuleBasedCollator this object was created from.
  72:    */
  73:   RuleBasedCollator collator;
  74: 
  75:   /**
  76:    * This is the String that is being iterated over.
  77:    */
  78:   CharacterIterator text;
  79: 
  80:   /**
  81:    * This is the index into the collation decomposition where we are currently scanning.
  82:    */
  83:   int index;
  84: 
  85:   /**
  86:    * This is the index into the String where we are currently scanning.
  87:    */
  88:   int textIndex;
  89: 
  90:   /**
  91:    * Array containing the collation decomposition of the
  92:    * text given to the constructor.
  93:    */
  94:   private RuleBasedCollator.CollationElement[] textDecomposition;
  95: 
  96:   /**
  97:    * Array containing the index of the specified block.
  98:    */
  99:   private int[] textIndexes;
 100: 
 101:   /**
 102:    * This method initializes a new instance of <code>CollationElementIterator</code>
 103:    * to iterate over the specified <code>String</code> using the rules in the
 104:    * specified <code>RuleBasedCollator</code>.
 105:    *
 106:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 107:    * @param text The <code>String</code> to iterate over.
 108:    */
 109:   CollationElementIterator(RuleBasedCollator collator, String text)
 110:   {
 111:     this.collator = collator;
 112: 
 113:     setText (text);
 114:   }
 115: 
 116:   /**
 117:    * This method initializes a new instance of <code>CollationElementIterator</code>
 118:    * to iterate over the specified <code>String</code> using the rules in the
 119:    * specified <code>RuleBasedCollator</code>.
 120:    *
 121:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 122:    * @param text The character iterator to iterate over.
 123:    */
 124:   CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
 125:   {
 126:     this.collator = collator;
 127: 
 128:     setText (text);
 129:   }
 130: 
 131:   RuleBasedCollator.CollationElement nextBlock()
 132:   {
 133:     if (index >= textDecomposition.length)
 134:       return null;
 135: 
 136:     RuleBasedCollator.CollationElement e = textDecomposition[index];
 137: 
 138:     textIndex = textIndexes[index+1];
 139: 
 140:     index++;
 141: 
 142:     return e;
 143:   }
 144: 
 145:   RuleBasedCollator.CollationElement previousBlock()
 146:   {
 147:     if (index == 0)
 148:       return null;
 149: 
 150:     index--;
 151:     RuleBasedCollator.CollationElement e = textDecomposition[index];
 152: 
 153:     textIndex = textIndexes[index+1];
 154: 
 155:     return e;
 156:   }
 157: 
 158:   /**
 159:    * This method returns the collation ordering value of the next character sequence
 160:    * in the string (it may be an extended character following collation rules).
 161:    * This method will return <code>NULLORDER</code> if the
 162:    * end of the string was reached.
 163:    *
 164:    * @return The collation ordering value.
 165:    */
 166:   public int next()
 167:   {
 168:     RuleBasedCollator.CollationElement e = nextBlock();
 169: 
 170:     if (e == null)
 171:       return NULLORDER;
 172: 
 173:     return e.getValue();
 174:   }
 175: 
 176:   /**
 177:    * This method returns the collation ordering value of the previous character
 178:    * in the string.  This method will return <code>NULLORDER</code> if the
 179:    * beginning of the string was reached.
 180:    *
 181:    * @return The collation ordering value.
 182:    */
 183:   public int previous()
 184:   {
 185:     RuleBasedCollator.CollationElement e = previousBlock();
 186: 
 187:     if (e == null)
 188:       return NULLORDER;
 189: 
 190:     return e.getValue();
 191:   }
 192: 
 193:   /**
 194:    * This method returns the primary order value for the given collation
 195:    * value.
 196:    *
 197:    * @param order The collation value returned from <code>next()</code> or
 198:    *              <code>previous()</code>.
 199:    *
 200:    * @return The primary order value of the specified collation value.  This is
 201:    *         the high 16 bits.
 202:    */
 203:   public static int primaryOrder(int order)
 204:   {
 205:     // From the JDK 1.2 spec.
 206:     return order >>> 16;
 207:   }
 208: 
 209:   /**
 210:    * This method resets the internal position pointer to read from the
 211:    * beginning of the <code>String</code> again.
 212:    */
 213:   public void reset()
 214:   {
 215:     index = 0;
 216:     textIndex = 0;
 217:   }
 218: 
 219:   /**
 220:    * This method returns the secondary order value for the given collation
 221:    * value.
 222:    *
 223:    * @param order The collation value returned from <code>next()</code> or
 224:    *              <code>previous()</code>.
 225:    *
 226:    * @return The secondary order value of the specified collation value.  This
 227:    *         is the bits 8-15.
 228:    */
 229:   public static short secondaryOrder(int order)
 230:   {
 231:     // From the JDK 1.2 spec.
 232:     return (short) ((order >>> 8) & 255);
 233:   }
 234: 
 235:   /**
 236:    * This method returns the tertiary order value for the given collation
 237:    * value.
 238:    *
 239:    * @param order The collation value returned from <code>next()</code> or
 240:    *              <code>previous()</code>.
 241:    *
 242:    * @return The tertiary order value of the specified collation value.  This
 243:    *         is the low eight bits.
 244:    */
 245:   public static short tertiaryOrder(int order)
 246:   {
 247:     // From the JDK 1.2 spec.
 248:     return (short) (order & 255);
 249:   }
 250: 
 251:   /**
 252:    * This method sets the <code>String</code> that it is iterating over
 253:    * to the specified <code>String</code>.
 254:    *
 255:    * @param text The new <code>String</code> to iterate over.
 256:    *
 257:    * @since 1.2
 258:    */
 259:   public void setText(String text)
 260:   {
 261:     int idx = 0;
 262:     int idx_idx = 0;
 263:     int alreadyExpanded = 0;
 264:     int idxToMove = 0;
 265: 
 266:     this.text = new StringCharacterIterator(text);
 267:     this.index = 0;
 268: 
 269:     String work_text = text.intern();
 270: 
 271:     ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>();
 272:     ArrayList<Integer> aIdx = new ArrayList<Integer>();
 273: 
 274:     // Build element collection ordered as they come in "text".
 275:     while (idx < work_text.length())
 276:       {
 277:         String key, keyOld;
 278: 
 279:         Object object = null;
 280:         int p = 1;
 281: 
 282:         // IMPROVE: use a TreeMap with a prefix-ordering rule.
 283:         keyOld = key = null;
 284:         do
 285:           {
 286:             if (object != null)
 287:               keyOld = key;
 288:             key = work_text.substring (idx, idx+p);
 289:             object = collator.prefix_tree.get (key);
 290:             if (object != null && idx < alreadyExpanded)
 291:               {
 292:                 RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 293:                 if (prefix.expansion != null &&
 294:                     prefix.expansion.startsWith(work_text.substring(0, idx)))
 295:                 {
 296:                   object = null;
 297:                   key = keyOld;
 298:                 }
 299:               }
 300:             p++;
 301:           }
 302:         while (idx+p <= work_text.length());
 303: 
 304:         if (object == null)
 305:           key = keyOld;
 306: 
 307:         RuleBasedCollator.CollationElement prefix =
 308:           (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 309: 
 310:         /*
 311:          * First case: There is no such sequence in the database.
 312:          * We will have to build one from the context.
 313:          */
 314:         if (prefix == null)
 315:           {
 316:             /*
 317:              * We are dealing with sequences in an expansion. They
 318:              * are treated as accented characters (tertiary order).
 319:              */
 320:             if (alreadyExpanded > 0)
 321:               {
 322:                 RuleBasedCollator.CollationElement e =
 323:                   collator.getDefaultAccentedElement (work_text.charAt (idx));
 324: 
 325:                 aElement.add (e);
 326:                 aIdx.add (Integer.valueOf(idx_idx));
 327:                 idx++;
 328:                 alreadyExpanded--;
 329:                 if (alreadyExpanded == 0)
 330:                   {
 331:                     /* There is not any characters left in the expansion set.
 332:                      * We can increase the pointer in the source string.
 333:                      */
 334:                     idx_idx += idxToMove;
 335:                     idxToMove = 0;
 336:                   }
 337:                 else
 338:                   idx_idx++;
 339:               }
 340:             else
 341:               {
 342:                 /* This is a normal character. */
 343:                 RuleBasedCollator.CollationElement e =
 344:                   collator.getDefaultElement (work_text.charAt (idx));
 345:                 Integer iRef = Integer.valueOf(idx_idx);
 346: 
 347:                 /* Don't forget to mark it as a special sequence so the
 348:                  * string can be ordered.
 349:                  */
 350:                 aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 351:                 aIdx.add (iRef);
 352:                 aElement.add (e);
 353:                 aIdx.add (iRef);
 354:                 idx_idx++;
 355:                 idx++;
 356:               }
 357:             continue;
 358:           }
 359: 
 360:         /*
 361:          * Second case: Here we have found a matching sequence.
 362:          * Here we have an expansion string prepend it to the "work text" and
 363:          * add the corresponding sorting element. We must also mark
 364:          */
 365:         if (prefix.expansion != null)
 366:           {
 367:             work_text = prefix.expansion
 368:               + work_text.substring (idx+prefix.key.length());
 369:             idx = 0;
 370:             aElement.add (prefix);
 371:             aIdx.add (Integer.valueOf(idx_idx));
 372:             if (alreadyExpanded == 0)
 373:               idxToMove = prefix.key.length();
 374:             alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 375:           }
 376:         else
 377:           {
 378:             /* Third case: the simplest. We have got the prefix and it
 379:              * has not to be expanded.
 380:              */
 381:             aElement.add (prefix);
 382:             aIdx.add (Integer.valueOf(idx_idx));
 383:             idx += prefix.key.length();
 384:             /* If the sequence is in an expansion, we must decrease the
 385:              * counter.
 386:              */
 387:             if (alreadyExpanded > 0)
 388:               {
 389:                 alreadyExpanded -= prefix.key.length();
 390:                 if (alreadyExpanded == 0)
 391:                   {
 392:                     idx_idx += idxToMove;
 393:                     idxToMove = 0;
 394:                   }
 395:               }
 396:             else
 397:               idx_idx += prefix.key.length();
 398:           }
 399:       }
 400: 
 401:     textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]);
 402:     textIndexes = new int[aIdx.size()+1];
 403:     for (int i = 0; i < aIdx.size(); i++)
 404:       {
 405:         textIndexes[i] = aIdx.get(i).intValue();
 406:       }
 407:     textIndexes[aIdx.size()] = text.length();
 408:   }
 409: 
 410:   /**
 411:    * This method sets the <code>String</code> that it is iterating over
 412:    * to the <code>String</code> represented by the specified
 413:    * <code>CharacterIterator</code>.
 414:    *
 415:    * @param source The <code>CharacterIterator</code> containing the new
 416:    * <code>String</code> to iterate over.
 417:    */
 418:   public void setText(CharacterIterator source)
 419:   {
 420:     CPStringBuilder expand = new CPStringBuilder();
 421: 
 422:     // For now assume we read from the beginning of the string.
 423:     for (char c = source.first();
 424:          c != CharacterIterator.DONE;
 425:          c = source.next())
 426:       expand.append(c);
 427: 
 428:     setText(expand.toString());
 429:   }
 430: 
 431:   /**
 432:    * This method returns the current offset into the <code>String</code>
 433:    * that is being iterated over.
 434:    *
 435:    * @return The iteration index position.
 436:    *
 437:    * @since 1.2
 438:    */
 439:   public int getOffset()
 440:   {
 441:     return textIndex;
 442:   }
 443: 
 444:   /**
 445:    * This method sets the iteration index position into the current
 446:    * <code>String</code> to the specified value.  This value must not
 447:    * be negative and must not be greater than the last index position
 448:    * in the <code>String</code>.
 449:    *
 450:    * @param offset The new iteration index position.
 451:    *
 452:    * @exception IllegalArgumentException If the new offset is not valid.
 453:    */
 454:   public void setOffset(int offset)
 455:   {
 456:     if (offset < 0)
 457:       throw new IllegalArgumentException("Negative offset: " + offset);
 458: 
 459:     if (offset > (text.getEndIndex() - 1))
 460:       throw new IllegalArgumentException("Offset too large: " + offset);
 461: 
 462:     for (index = 0; index < textDecomposition.length; index++)
 463:       {
 464:         if (offset <= textIndexes[index])
 465:           break;
 466:       }
 467:     /*
 468:      * As textIndexes[0] == 0, we should not have to take care whether index is
 469:      * greater than 0. It is always.
 470:      */
 471:     if (textIndexes[index] == offset)
 472:       textIndex = offset;
 473:     else
 474:       textIndex = textIndexes[index-1];
 475:   }
 476: 
 477:   /**
 478:    * This method returns the maximum length of any expansion sequence that
 479:    * ends with the specified collation order value.  (Whatever that means).
 480:    *
 481:    * @param value The collation order value
 482:    *
 483:    * @return The maximum length of an expansion sequence.
 484:    */
 485:   public int getMaxExpansion(int value)
 486:   {
 487:     return 1;
 488:   }
 489: }