Source for gnu.java.text.CharacterBreakIterator

   1: /* CharacterBreakIterator.java - Default character BreakIterator.
   2:    Copyright (C) 1999, 2001, 2004 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.java.text;
  40: 
  41: import java.text.CharacterIterator;
  42: 
  43: /**
  44:  * @author Tom Tromey <tromey@cygnus.com>
  45:  * @date March 19, 1999
  46:  * Written using The Unicode Standard, Version 2.0.
  47:  */
  48: 
  49: public class CharacterBreakIterator extends BaseBreakIterator
  50: {
  51:   // Hangul Jamo constants from Unicode book.
  52:   private static final int LBase = 0x1100;
  53:   private static final int VBase = 0x1161;
  54:   private static final int TBase = 0x11a7;
  55:   private static final int LCount = 19;
  56:   private static final int VCount = 21;
  57:   private static final int TCount = 28;
  58: 
  59:   // Information about surrogates.
  60:   private static final int highSurrogateStart = 0xD800;
  61:   private static final int highSurrogateEnd = 0xDBFF;
  62:   private static final int lowSurrogateStart = 0xDC00;
  63:   private static final int lowSurrogateEnd = 0xDFFF;
  64: 
  65:   public Object clone ()
  66:   {
  67:     return new CharacterBreakIterator (this);
  68:   }
  69: 
  70:   public CharacterBreakIterator ()
  71:   {
  72:   }
  73: 
  74:   private CharacterBreakIterator (CharacterBreakIterator other)
  75:   {
  76:     iter = (CharacterIterator) other.iter.clone();
  77:   }
  78: 
  79:   // Some methods to tell us different properties of characters.
  80:   private final boolean isL (char c)
  81:   {
  82:     return c >= LBase && c <= LBase + LCount;
  83:   }
  84:   private final boolean isV (char c)
  85:   {
  86:     return c >= VBase && c <= VBase + VCount;
  87:   }
  88:   private final boolean isT (char c)
  89:   {
  90:     return c >= TBase && c <= TBase + TCount;
  91:   }
  92:   private final boolean isLVT (char c)
  93:   {
  94:     return isL (c) || isV (c) || isT (c);
  95:   }
  96:   private final boolean isHighSurrogate (char c)
  97:   {
  98:     return c >= highSurrogateStart && c <= highSurrogateEnd;
  99:   }
 100:   private final boolean isLowSurrogate (char c)
 101:   {
 102:     return c >= lowSurrogateStart && c <= lowSurrogateEnd;
 103:   }
 104: 
 105:   public int next ()
 106:   {
 107:     int end = iter.getEndIndex();
 108:     if (iter.getIndex() == end)
 109:       return DONE;
 110: 
 111:     char c;
 112:     for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c)
 113:       {
 114:         c = iter.next();
 115:         if (c == CharacterIterator.DONE)
 116:           break;
 117:         int type = Character.getType(c);
 118: 
 119:         // Break after paragraph separators.
 120:         if (type == Character.PARAGRAPH_SEPARATOR)
 121:           break;
 122: 
 123:         // Now we need some lookahead.
 124:         char ahead = iter.next();
 125:         iter.previous();
 126:         if (ahead == CharacterIterator.DONE)
 127:           break;
 128:         int aheadType = Character.getType(ahead);
 129: 
 130:         if (aheadType != Character.NON_SPACING_MARK
 131:             && ! isLowSurrogate (ahead)
 132:             && ! isLVT (ahead))
 133:           break;
 134:         if (! isLVT (c) && isLVT (ahead))
 135:           break;
 136:         if (isL (c) && ! isLVT (ahead)
 137:             && aheadType != Character.NON_SPACING_MARK)
 138:           break;
 139:         if (isV (c) && ! isV (ahead) && !isT (ahead)
 140:             && aheadType != Character.NON_SPACING_MARK)
 141:           break;
 142:         if (isT (c) && ! isT (ahead)
 143:             && aheadType != Character.NON_SPACING_MARK)
 144:           break;
 145: 
 146:         if (! isHighSurrogate (c) && isLowSurrogate (ahead))
 147:           break;
 148:         if (isHighSurrogate (c) && ! isLowSurrogate (ahead))
 149:           break;
 150:         if (! isHighSurrogate (prev) && isLowSurrogate (c))
 151:           break;
 152:       }
 153: 
 154:     return iter.getIndex();
 155:   }
 156: 
 157:   public int previous ()
 158:   {
 159:     if (iter.getIndex() == iter.getBeginIndex())
 160:       return DONE;
 161: 
 162:     while (iter.getIndex() >= iter.getBeginIndex())
 163:       {
 164:         char c = iter.previous();
 165:         if (c == CharacterIterator.DONE)
 166:           break;
 167:         int type = Character.getType(c);
 168: 
 169:         if (type != Character.NON_SPACING_MARK
 170:             && ! isLowSurrogate (c)
 171:             && ! isLVT (c))
 172:           break;
 173: 
 174:         // Now we need some lookahead.
 175:         char ahead = iter.previous();
 176:         if (ahead == CharacterIterator.DONE)
 177:           {
 178:             iter.next();
 179:             break;
 180:           }
 181:         char ahead2 = iter.previous();
 182:         iter.next();
 183:         iter.next();
 184:         if (ahead2 == CharacterIterator.DONE)
 185:           break;
 186:         int aheadType = Character.getType(ahead);
 187: 
 188:         if (aheadType == Character.PARAGRAPH_SEPARATOR)
 189:           break;
 190: 
 191:         if (isLVT (c) && ! isLVT (ahead))
 192:           break;
 193:         if (! isLVT (c) && type != Character.NON_SPACING_MARK
 194:             && isL (ahead))
 195:           break;
 196:         if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK
 197:             && isV (ahead))
 198:           break;
 199:         if (! isT (c) && type != Character.NON_SPACING_MARK
 200:             && isT (ahead))
 201:           break;
 202: 
 203:         if (isLowSurrogate (c) && ! isHighSurrogate (ahead))
 204:           break;
 205:         if (! isLowSurrogate (c) && isHighSurrogate (ahead))
 206:           break;
 207:         if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2))
 208:           break;
 209:       }
 210: 
 211:     return iter.getIndex();
 212:   }
 213: }