Frames | No Frames |
1: /* RuleBasedCollator.java -- Concrete Collator Class 2: Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package java.text; 40: 41: import gnu.classpath.NotImplementedException; 42: 43: import java.util.ArrayList; 44: import java.util.HashMap; 45: 46: /* Written using "Java Class Libraries", 2nd edition, plus online 47: * API docs for JDK 1.2 from http://www.javasoft.com. 48: * Status: Believed complete and correct 49: */ 50: 51: /** 52: * This class is a concrete subclass of <code>Collator</code> suitable 53: * for string collation in a wide variety of languages. An instance of 54: * this class is normally returned by the <code>getInstance</code> method 55: * of <code>Collator</code> with rules predefined for the requested 56: * locale. However, an instance of this class can be created manually 57: * with any desired rules. 58: * <p> 59: * Rules take the form of a <code>String</code> with the following syntax 60: * <ul> 61: * <li> Modifier: '@'</li> 62: * <li> Relation: '<' | ';' | ',' | '=' : <text></li> 63: * <li> Reset: '&' : <text></li> 64: * </ul> 65: * The modifier character indicates that accents sort backward as is the 66: * case with French. The modifier applies to all rules <b>after</b> 67: * the modifier but before the next primary sequence. If placed at the end 68: * of the sequence if applies to all unknown accented character. 69: * The relational operators specify how the text 70: * argument relates to the previous term. The relation characters have 71: * the following meanings: 72: * <ul> 73: * <li>'<' - The text argument is greater than the prior term at the primary 74: * difference level.</li> 75: * <li>';' - The text argument is greater than the prior term at the secondary 76: * difference level.</li> 77: * <li>',' - The text argument is greater than the prior term at the tertiary 78: * difference level.</li> 79: * <li>'=' - The text argument is equal to the prior term</li> 80: * </ul> 81: * <p> 82: * As for the text argument itself, this is any sequence of Unicode 83: * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F, 84: * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 85: * desired, they must be enclosed in single quotes. If any whitespace is 86: * encountered, it is ignored. (For example, "a b" is equal to "ab"). 87: * <p> 88: * The reset operation inserts the following rule at the point where the 89: * text argument to it exists in the previously declared rule string. This 90: * makes it easy to add new rules to an existing string by simply including 91: * them in a reset sequence at the end. Note that the text argument, or 92: * at least the first character of it, must be present somewhere in the 93: * previously declared rules in order to be inserted properly. If this 94: * is not satisfied, a <code>ParseException</code> will be thrown. 95: * <p> 96: * This system of configuring <code>RuleBasedCollator</code> is needlessly 97: * complex and the people at Taligent who developed it (along with the folks 98: * at Sun who accepted it into the Java standard library) deserve a slow 99: * and agonizing death. 100: * <p> 101: * Here are a couple of example of rule strings: 102: * <p> 103: * "< a < b < c" - This string says that a is greater than b which is 104: * greater than c, with all differences being primary differences. 105: * <p> 106: * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with 107: * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and 108: * 'A' during a primary strength comparison. But 'B' is greater than 'b' 109: * under a tertiary strength comparison. 110: * <p> 111: * "< a < c & a < b " - This sequence is identical in function to the 112: * "< a < b < c" rule string above. The '&' reset symbol indicates that 113: * the rule "< b" is to be inserted after the text argument "a" in the 114: * previous rule string segment. 115: * <p> 116: * "< a < b & y < z" - This is an error. The character 'y' does not appear 117: * anywhere in the previous rule string segment so the rule following the 118: * reset rule cannot be inserted. 119: * <p> 120: * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following 121: * "< a & A < E & e < f & F". 122: * <p> 123: * For a description of the various comparison strength types, see the 124: * documentation for the <code>Collator</code> class. 125: * <p> 126: * As an additional complication to this already overly complex rule scheme, 127: * if any characters precede the first rule, these characters are considered 128: * ignorable. They will be treated as if they did not exist during 129: * comparisons. For example, "- < a < b ..." would make '-' an ignorable 130: * character such that the strings "high-tech" and "hightech" would 131: * be considered identical. 132: * <p> 133: * A <code>ParseException</code> will be thrown for any of the following 134: * conditions: 135: * <ul> 136: * <li>Unquoted punctuation characters in a text argument.</li> 137: * <li>A relational or reset operator not followed by a text argument</li> 138: * <li>A reset operator where the text argument is not present in 139: * the previous rule string section.</li> 140: * </ul> 141: * 142: * @author Aaron M. Renn (arenn@urbanophile.com) 143: * @author Tom Tromey (tromey@cygnus.com) 144: * @author Guilhem Lavaux (guilhem@kaffe.org) 145: */ 146: public class RuleBasedCollator extends Collator 147: { 148: /** 149: * This class describes what rank has a character (or a sequence of characters) 150: * in the lexicographic order. Each element in a rule has a collation element. 151: */ 152: static final class CollationElement 153: { 154: final String key; 155: final int primary; 156: final short secondary; 157: final short tertiary; 158: final short equality; 159: final boolean ignore; 160: final String expansion; 161: 162: CollationElement(String key, int primary, short secondary, short tertiary, 163: short equality, String expansion, boolean ignore) 164: { 165: this.key = key; 166: this.primary = primary; 167: this.secondary = secondary; 168: this.tertiary = tertiary; 169: this.equality = equality; 170: this.ignore = ignore; 171: this.expansion = expansion; 172: } 173: 174: int getValue() 175: { 176: return (primary << 16) + (secondary << 8) + tertiary; 177: } 178: } 179: 180: /** 181: * Basic collation instruction (internal format) to build the series of 182: * collation elements. It contains an instruction which specifies the new 183: * state of the generator. The sequence of instruction should not contain 184: * RESET (it is used by 185: * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)}) 186: * as a temporary state while merging two sets of instructions. 187: */ 188: private static final class CollationSorter 189: { 190: static final int GREATERP = 0; 191: static final int GREATERS = 1; 192: static final int GREATERT = 2; 193: static final int EQUAL = 3; 194: static final int RESET = 4; 195: static final int INVERSE_SECONDARY = 5; 196: 197: final int comparisonType; 198: final String textElement; 199: final int hashText; 200: final int offset; 201: final boolean ignore; 202: 203: String expansionOrdering; 204: 205: private CollationSorter(final int comparisonType, final String textElement, 206: final int offset, final boolean ignore) 207: { 208: this.comparisonType = comparisonType; 209: this.textElement = textElement; 210: this.offset = offset; 211: this.ignore = ignore; 212: hashText = textElement.hashCode(); 213: } 214: } 215: 216: /** 217: * This is the original rule string. 218: */ 219: private String rules; 220: 221: /** 222: * This is the table of collation element values 223: */ 224: private CollationElement[] ce_table; 225: 226: /** 227: * Quick-prefix finder. 228: */ 229: HashMap<String,CollationElement> prefix_tree; 230: 231: /** 232: * This is the value of the last sequence entered into 233: * <code>ce_table</code>. It is used to compute the 234: * ordering value of unspecified character. 235: */ 236: private int last_primary_value; 237: 238: /** 239: * This is the value of the last secondary sequence of the 240: * primary 0, entered into 241: * <code>ce_table</code>. It is used to compute the 242: * ordering value of an unspecified accented character. 243: */ 244: private int last_tertiary_value; 245: 246: /** 247: * This variable is true if accents need to be sorted 248: * in the other direction. 249: */ 250: private boolean inverseAccentComparison; 251: 252: /** 253: * This collation element is special to unknown sequence. 254: * The JDK uses it to mark and sort the characters which has 255: * no collation rules. 256: */ 257: static final CollationElement SPECIAL_UNKNOWN_SEQ = 258: new CollationElement("", (short) 32767, (short) 0, (short) 0, 259: (short) 0, null, false); 260: 261: /** 262: * This method initializes a new instance of <code>RuleBasedCollator</code> 263: * with the specified collation rules. Note that an application normally 264: * obtains an instance of <code>RuleBasedCollator</code> by calling the 265: * <code>getInstance</code> method of <code>Collator</code>. That method 266: * automatically loads the proper set of rules for the desired locale. 267: * 268: * @param rules The collation rule string. 269: * 270: * @exception ParseException If the rule string contains syntax errors. 271: */ 272: public RuleBasedCollator(String rules) throws ParseException 273: { 274: if (rules.equals("")) 275: throw new ParseException("empty rule set", 0); 276: 277: this.rules = rules; 278: 279: buildCollationVector(parseString(rules)); 280: buildPrefixAccess(); 281: } 282: 283: /** 284: * This method returns the number of common characters at the beginning 285: * of the string of the two parameters. 286: * 287: * @param prefix A string considered as a prefix to test against 288: * the other string. 289: * @param s A string to test the prefix against. 290: * @return The number of common characters. 291: */ 292: static int findPrefixLength(String prefix, String s) 293: { 294: int index; 295: int len = prefix.length(); 296: 297: for (index = 0; index < len && index < s.length(); ++index) 298: { 299: if (prefix.charAt(index) != s.charAt(index)) 300: return index; 301: } 302: 303: 304: return index; 305: } 306: 307: /** 308: * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods 309: * checks whether it is possible to find an anchor point for the rules to be merged and 310: * then insert them at that precise point. 311: * 312: * @param offset Offset in the string containing rules of the beginning of the rules 313: * being merged in. 314: * @param starter Text of the rules being merged. 315: * @param main Repository of all already parsed rules. 316: * @param patch Rules to be merged into the repository. 317: * @throws ParseException if it is impossible to find an anchor point for the new rules. 318: */ 319: private void mergeRules(int offset, String starter, ArrayList<CollationSorter> main, 320: ArrayList<CollationSorter> patch) 321: throws ParseException 322: { 323: int insertion_point = -1; 324: int max_length = 0; 325: 326: /* We must check that no rules conflict with another already present. If it 327: * is the case delete the old rule. 328: */ 329: 330: /* For the moment good old O(N^2) algorithm. 331: */ 332: for (int i = 0; i < patch.size(); i++) 333: { 334: int j = 0; 335: 336: while (j < main.size()) 337: { 338: CollationSorter rule1 = patch.get(i); 339: CollationSorter rule2 = main.get(j); 340: 341: if (rule1.textElement.equals(rule2.textElement)) 342: main.remove(j); 343: else 344: j++; 345: } 346: } 347: 348: // Find the insertion point... O(N) 349: for (int i = 0; i < main.size(); i++) 350: { 351: CollationSorter sorter = main.get(i); 352: int length = findPrefixLength(starter, sorter.textElement); 353: 354: if (length > max_length) 355: { 356: max_length = length; 357: insertion_point = i+1; 358: } 359: } 360: 361: if (insertion_point < 0) 362: throw new ParseException("no insertion point found for " + starter, offset); 363: 364: if (max_length < starter.length()) 365: { 366: /* 367: * We need to expand the first entry. It must be sorted 368: * like if it was the reference key itself (like the spec 369: * said. So the first entry is special: the element is 370: * replaced by the specified text element for the sorting. 371: * This text replace the old one for comparisons. However 372: * to preserve the behaviour we replace the first key (corresponding 373: * to the found prefix) by a new code rightly ordered in the 374: * sequence. The rest of the subsequence must be appended 375: * to the end of the sequence. 376: */ 377: CollationSorter sorter = patch.get(0); 378: 379: sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element 380: 381: main.add(insertion_point, sorter); 382: 383: /* 384: * This is a new set of rules. Append to the list. 385: */ 386: patch.remove(0); 387: insertion_point++; 388: } 389: 390: // Now insert all elements of patch at the insertion point. 391: for (int i = 0; i < patch.size(); i++) 392: main.add(i+insertion_point, patch.get(i)); 393: } 394: 395: /** 396: * This method parses a string and build a set of sorting instructions. The parsing 397: * may only be partial on the case the rules are to be merged sometime later. 398: * 399: * @param stop_on_reset If this parameter is true then the parser stops when it 400: * encounters a reset instruction. In the other case, it tries to parse the subrules 401: * and merged it in the same repository. 402: * @param v Output vector for the set of instructions. 403: * @param base_offset Offset in the string to begin parsing. 404: * @param rules Rules to be parsed. 405: * @return -1 if the parser reached the end of the string, an integer representing the 406: * offset in the string at which it stopped parsing. 407: * @throws ParseException if something turned wrong during the parsing. To get details 408: * decode the message. 409: */ 410: private int subParseString(boolean stop_on_reset, ArrayList<CollationSorter> v, 411: int base_offset, String rules) 412: throws ParseException 413: { 414: boolean ignoreChars = (base_offset == 0); 415: int operator = -1; 416: StringBuilder sb = new StringBuilder(); 417: boolean doubleQuote = false; 418: boolean eatingChars = false; 419: boolean nextIsModifier = false; 420: boolean isModifier = false; 421: int i; 422: 423: main_parse_loop: 424: for (i = 0; i < rules.length(); i++) 425: { 426: char c = rules.charAt(i); 427: int type = -1; 428: 429: if (!eatingChars && 430: ((c >= 0x09 && c <= 0x0D) || (c == 0x20))) 431: continue; 432: 433: isModifier = nextIsModifier; 434: nextIsModifier = false; 435: 436: if (eatingChars && c != '\'') 437: { 438: doubleQuote = false; 439: sb.append(c); 440: continue; 441: } 442: if (doubleQuote && eatingChars) 443: { 444: sb.append(c); 445: doubleQuote = false; 446: continue; 447: } 448: 449: switch (c) 450: { 451: case '!': 452: throw new ParseException 453: ("Modifier '!' is not yet supported by Classpath", i + base_offset); 454: case '<': 455: type = CollationSorter.GREATERP; 456: break; 457: case ';': 458: type = CollationSorter.GREATERS; 459: break; 460: case ',': 461: type = CollationSorter.GREATERT; 462: break; 463: case '=': 464: type = CollationSorter.EQUAL; 465: break; 466: case '\'': 467: eatingChars = !eatingChars; 468: doubleQuote = true; 469: break; 470: case '@': 471: if (ignoreChars) 472: throw new ParseException 473: ("comparison list has not yet been started. You may only use" 474: + "(<,;=&)", i + base_offset); 475: // Inverse the order of secondaries from now on. 476: nextIsModifier = true; 477: type = CollationSorter.INVERSE_SECONDARY; 478: break; 479: case '&': 480: type = CollationSorter.RESET; 481: if (stop_on_reset) 482: break main_parse_loop; 483: break; 484: default: 485: if (operator < 0) 486: throw new ParseException 487: ("operator missing at " + (i + base_offset), i + base_offset); 488: if (! eatingChars 489: && ((c >= 0x21 && c <= 0x2F) 490: || (c >= 0x3A && c <= 0x40) 491: || (c >= 0x5B && c <= 0x60) 492: || (c >= 0x7B && c <= 0x7E))) 493: throw new ParseException 494: ("unquoted punctuation character '" + c + "'", i + base_offset); 495: 496: //type = ignoreChars ? CollationSorter.IGNORE : -1; 497: sb.append(c); 498: break; 499: } 500: 501: if (type < 0) 502: continue; 503: 504: if (operator < 0) 505: { 506: operator = type; 507: continue; 508: } 509: 510: if (sb.length() == 0 && !isModifier) 511: throw new ParseException 512: ("text element empty at " + (i+base_offset), i+base_offset); 513: 514: if (operator == CollationSorter.RESET) 515: { 516: /* Reposition in the sorting list at the position 517: * indicated by the text element. 518: */ 519: String subrules = rules.substring(i); 520: ArrayList<CollationSorter> sorted_rules = new ArrayList<CollationSorter>(); 521: int idx; 522: 523: // Parse the subrules but do not iterate through all 524: // sublist. This is the privilege of the first call. 525: idx = subParseString(true, sorted_rules, base_offset+i, subrules); 526: 527: // Merge new parsed rules into the list. 528: mergeRules(base_offset+i, sb.toString(), v, sorted_rules); 529: sb.setLength(0); 530: 531: // Reset state to none. 532: operator = -1; 533: type = -1; 534: // We have found a new subrule at 'idx' but it has not been parsed. 535: if (idx >= 0) 536: { 537: i += idx-1; 538: continue main_parse_loop; 539: } 540: else 541: // No more rules. 542: break main_parse_loop; 543: } 544: 545: String textElement = sb.toString(); 546: if (operator == CollationSorter.GREATERP) 547: ignoreChars = false; 548: CollationSorter sorter = new CollationSorter(operator, textElement, 549: base_offset + rules.length(), 550: ignoreChars); 551: sb.setLength(0); 552: 553: v.add(sorter); 554: operator = type; 555: } 556: 557: if (operator >= 0) 558: { 559: int pos = rules.length() + base_offset; 560: 561: if ((sb.length() != 0 && nextIsModifier) 562: || (sb.length() == 0 && !nextIsModifier && !eatingChars)) 563: throw new ParseException("text element empty at " + pos, pos); 564: 565: if (operator == CollationSorter.GREATERP) 566: ignoreChars = false; 567: 568: CollationSorter sorter = new CollationSorter(operator, sb.toString(), 569: base_offset+pos, ignoreChars); 570: v.add(sorter); 571: } 572: 573: if (i == rules.length()) 574: return -1; 575: else 576: return i; 577: } 578: 579: /** 580: * This method creates a copy of this object. 581: * 582: * @return A copy of this object. 583: */ 584: public Object clone() 585: { 586: return super.clone(); 587: } 588: 589: /** 590: * This method completely parses a string 'rules' containing sorting rules. 591: * 592: * @param rules String containing the rules to be parsed. 593: * @return A set of sorting instructions stored in a Vector. 594: * @throws ParseException if something turned wrong during the parsing. To get details 595: * decode the message. 596: */ 597: private ArrayList<CollationSorter> parseString(String rules) 598: throws ParseException 599: { 600: ArrayList<CollationSorter> v = new ArrayList<CollationSorter>(); 601: 602: // result of the first subParseString is not absolute (may be -1 or a 603: // positive integer). But we do not care. 604: subParseString(false, v, 0, rules); 605: 606: return v; 607: } 608: 609: /** 610: * This method uses the sorting instructions built by {@link #parseString} 611: * to build collation elements which can be directly used to sort strings. 612: * 613: * @param parsedElements Parsed instructions stored in a ArrayList. 614: * @throws ParseException if the order of the instructions are not valid. 615: */ 616: private void buildCollationVector(ArrayList<CollationSorter> parsedElements) 617: throws ParseException 618: { 619: int primary_seq = 0; 620: int last_tertiary_seq = 0; 621: short secondary_seq = 0; 622: short tertiary_seq = 0; 623: short equality_seq = 0; 624: boolean inverseComparisons = false; 625: final boolean DECREASING = false; 626: final boolean INCREASING = true; 627: boolean secondaryType = INCREASING; 628: ArrayList<CollationElement> v = new ArrayList<CollationElement>(); 629: 630: // elts is completely sorted. 631: element_loop: 632: for (int i = 0; i < parsedElements.size(); i++) 633: { 634: CollationSorter elt = parsedElements.get(i); 635: 636: switch (elt.comparisonType) 637: { 638: case CollationSorter.GREATERP: 639: primary_seq++; 640: if (inverseComparisons) 641: { 642: secondary_seq = Short.MAX_VALUE; 643: secondaryType = DECREASING; 644: } 645: else 646: { 647: secondary_seq = 0; 648: secondaryType = INCREASING; 649: } 650: tertiary_seq = 0; 651: equality_seq = 0; 652: inverseComparisons = false; 653: break; 654: case CollationSorter.GREATERS: 655: if (secondaryType == DECREASING) 656: secondary_seq--; 657: else 658: secondary_seq++; 659: tertiary_seq = 0; 660: equality_seq = 0; 661: break; 662: case CollationSorter.INVERSE_SECONDARY: 663: inverseComparisons = true; 664: continue element_loop; 665: case CollationSorter.GREATERT: 666: tertiary_seq++; 667: if (primary_seq == 0) 668: last_tertiary_seq = tertiary_seq; 669: equality_seq = 0; 670: break; 671: case CollationSorter.EQUAL: 672: equality_seq++; 673: break; 674: case CollationSorter.RESET: 675: throw new ParseException 676: ("Invalid reached state 'RESET'. Internal error", elt.offset); 677: default: 678: throw new ParseException 679: ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); 680: } 681: 682: v.add(new CollationElement(elt.textElement, primary_seq, 683: secondary_seq, tertiary_seq, 684: equality_seq, elt.expansionOrdering, elt.ignore)); 685: } 686: 687: this.inverseAccentComparison = inverseComparisons; 688: 689: ce_table = v.toArray(new CollationElement[v.size()]); 690: 691: last_primary_value = primary_seq+1; 692: last_tertiary_value = last_tertiary_seq+1; 693: } 694: 695: /** 696: * Build a tree where all keys are the texts of collation elements and data is 697: * the collation element itself. The tree is used when extracting all prefix 698: * for a given text. 699: */ 700: private void buildPrefixAccess() 701: { 702: prefix_tree = new HashMap<String,CollationElement>(); 703: 704: for (int i = 0; i < ce_table.length; i++) 705: { 706: CollationElement e = ce_table[i]; 707: 708: prefix_tree.put(e.key, e); 709: } 710: } 711: 712: /** 713: * This method returns an integer which indicates whether the first 714: * specified <code>String</code> is less than, greater than, or equal to 715: * the second. The value depends not only on the collation rules in 716: * effect, but also the strength and decomposition settings of this object. 717: * 718: * @param source The first <code>String</code> to compare. 719: * @param target A second <code>String</code> to compare to the first. 720: * 721: * @return A negative integer if source < target, a positive integer 722: * if source > target, or 0 if source == target. 723: */ 724: public int compare(String source, String target) 725: { 726: CollationElementIterator cs, ct; 727: CollationElement ord1block = null; 728: CollationElement ord2block = null; 729: boolean advance_block_1 = true; 730: boolean advance_block_2 = true; 731: 732: cs = getCollationElementIterator(source); 733: ct = getCollationElementIterator(target); 734: 735: for(;;) 736: { 737: int ord1; 738: int ord2; 739: 740: /* 741: * We have to check whether the characters are ignorable. 742: * If it is the case then forget them. 743: */ 744: if (advance_block_1) 745: { 746: ord1block = cs.nextBlock(); 747: if (ord1block != null && ord1block.ignore) 748: continue; 749: } 750: 751: if (advance_block_2) 752: { 753: ord2block = ct.nextBlock(); 754: if (ord2block != null && ord2block.ignore) 755: { 756: advance_block_1 = false; 757: continue; 758: } 759: } 760: else 761: advance_block_2 = true; 762: 763: if (!advance_block_1) 764: advance_block_1 = true; 765: 766: if (ord1block != null) 767: ord1 = ord1block.getValue(); 768: else 769: { 770: if (ord2block == null) 771: return 0; 772: return -1; 773: } 774: 775: if (ord2block == null) 776: return 1; 777: 778: ord2 = ord2block.getValue(); 779: 780: // We know chars are totally equal, so skip 781: if (ord1 == ord2) 782: { 783: if (getStrength() == IDENTICAL) 784: if (!ord1block.key.equals(ord2block.key)) 785: return ord1block.key.compareTo(ord2block.key); 786: continue; 787: } 788: 789: // Check for primary strength differences 790: int prim1 = CollationElementIterator.primaryOrder(ord1); 791: int prim2 = CollationElementIterator.primaryOrder(ord2); 792: 793: if (prim1 == 0 && getStrength() < TERTIARY) 794: { 795: advance_block_2 = false; 796: continue; 797: } 798: else if (prim2 == 0 && getStrength() < TERTIARY) 799: { 800: advance_block_1 = false; 801: continue; 802: } 803: 804: if (prim1 < prim2) 805: return -1; 806: else if (prim1 > prim2) 807: return 1; 808: else if (getStrength() == PRIMARY) 809: continue; 810: 811: // Check for secondary strength differences 812: int sec1 = CollationElementIterator.secondaryOrder(ord1); 813: int sec2 = CollationElementIterator.secondaryOrder(ord2); 814: 815: if (sec1 < sec2) 816: return -1; 817: else if (sec1 > sec2) 818: return 1; 819: else if (getStrength() == SECONDARY) 820: continue; 821: 822: // Check for tertiary differences 823: int tert1 = CollationElementIterator.tertiaryOrder(ord1); 824: int tert2 = CollationElementIterator.tertiaryOrder(ord2); 825: 826: if (tert1 < tert2) 827: return -1; 828: else if (tert1 > tert2) 829: return 1; 830: else if (getStrength() == TERTIARY) 831: continue; 832: 833: // Apparently JDK does this (at least for my test case). 834: return ord1block.key.compareTo(ord2block.key); 835: } 836: } 837: 838: /** 839: * This method tests this object for equality against the specified 840: * object. This will be true if and only if the specified object is 841: * another reference to this object. 842: * 843: * @param obj The <code>Object</code> to compare against this object. 844: * 845: * @return <code>true</code> if the specified object is equal to this object, 846: * <code>false</code> otherwise. 847: */ 848: public boolean equals(Object obj) 849: { 850: if (obj == this) 851: return true; 852: else 853: return false; 854: } 855: 856: /** 857: * This method builds a default collation element without invoking 858: * the database created from the rules passed to the constructor. 859: * 860: * @param c Character which needs a collation element. 861: * @return A valid brand new CollationElement instance. 862: */ 863: CollationElement getDefaultElement(char c) 864: { 865: int v; 866: 867: // Preliminary support for generic accent sorting inversion (I don't know if all 868: // characters in the range should be sorted backward). This is the place 869: // to fix this if needed. 870: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 871: v = 0x0361 - ((int) c - 0x02B9); 872: else 873: v = (short) c; 874: return new CollationElement("" + c, last_primary_value + v, 875: (short) 0, (short) 0, (short) 0, null, false); 876: } 877: 878: /** 879: * This method builds a default collation element for an accented character 880: * without invoking the database created from the rules passed to the constructor. 881: * 882: * @param c Character which needs a collation element. 883: * @return A valid brand new CollationElement instance. 884: */ 885: CollationElement getDefaultAccentedElement(char c) 886: { 887: int v; 888: 889: // Preliminary support for generic accent sorting inversion (I don't know if all 890: // characters in the range should be sorted backward). This is the place 891: // to fix this if needed. 892: if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 893: v = 0x0361 - ((int) c - 0x02B9); 894: else 895: v = (short) c; 896: return new CollationElement("" + c, (short) 0, 897: (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false); 898: } 899: 900: /** 901: * This method returns an instance for <code>CollationElementIterator</code> 902: * for the specified <code>String</code> under the collation rules for this 903: * object. 904: * 905: * @param source The <code>String</code> to return the 906: * <code>CollationElementIterator</code> instance for. 907: * 908: * @return A <code>CollationElementIterator</code> for the specified 909: * <code>String</code>. 910: */ 911: public CollationElementIterator getCollationElementIterator(String source) 912: { 913: return new CollationElementIterator(this, source); 914: } 915: 916: /** 917: * This method returns an instance of <code>CollationElementIterator</code> 918: * for the <code>String</code> represented by the specified 919: * <code>CharacterIterator</code>. 920: * 921: * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. 922: * 923: * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. 924: */ 925: public CollationElementIterator getCollationElementIterator(CharacterIterator source) 926: { 927: return new CollationElementIterator(this, source); 928: } 929: 930: /** 931: * This method returns an instance of <code>CollationKey</code> for the 932: * specified <code>String</code>. The object returned will have a 933: * more efficient mechanism for its comparison function that could 934: * provide speed benefits if multiple comparisons are performed, such 935: * as during a sort. 936: * 937: * @param source The <code>String</code> to create a <code>CollationKey</code> for. 938: * 939: * @return A <code>CollationKey</code> for the specified <code>String</code>. 940: */ 941: public CollationKey getCollationKey(String source) 942: { 943: CollationElementIterator cei = getCollationElementIterator(source); 944: ArrayList<Integer> vect = new ArrayList<Integer>(); 945: 946: int ord = cei.next(); 947: cei.reset(); //set to start of string 948: 949: while (ord != CollationElementIterator.NULLORDER) 950: { 951: // If the primary order is null, it means this is an ignorable 952: // character. 953: if (CollationElementIterator.primaryOrder(ord) == 0) 954: { 955: ord = cei.next(); 956: continue; 957: } 958: switch (getStrength()) 959: { 960: case PRIMARY: 961: ord = CollationElementIterator.primaryOrder(ord); 962: break; 963: 964: case SECONDARY: 965: ord = CollationElementIterator.primaryOrder(ord) << 8; 966: ord |= CollationElementIterator.secondaryOrder(ord); 967: 968: default: 969: break; 970: } 971: 972: vect.add(Integer.valueOf(ord)); 973: ord = cei.next(); //increment to next key 974: } 975: 976: Integer[] objarr = vect.toArray(new Integer[vect.size()]); 977: byte[] key = new byte[objarr.length * 4]; 978: 979: for (int i = 0; i < objarr.length; i++) 980: { 981: int j = objarr[i].intValue(); 982: key [i * 4] = (byte) ((j & 0xFF000000) >> 24); 983: key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); 984: key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); 985: key [i * 4 + 3] = (byte) (j & 0x000000FF); 986: } 987: 988: return new CollationKey(this, source, key); 989: } 990: 991: /** 992: * This method returns a <code>String</code> containing the collation rules 993: * for this object. 994: * 995: * @return The collation rules for this object. 996: */ 997: public String getRules() 998: { 999: return rules; 1000: } 1001: 1002: /** 1003: * This method returns a hash value for this object. 1004: * 1005: * @return A hash value for this object. 1006: */ 1007: public int hashCode() 1008: { 1009: return System.identityHashCode(this); 1010: } 1011: }