1 /* 2 * Copyright (c) 2002, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import jdk.internal.misc.CDS; 29 import jdk.internal.value.DeserializeConstructor; 30 import jdk.internal.vm.annotation.IntrinsicCandidate; 31 import jdk.internal.vm.annotation.Stable; 32 33 import java.lang.constant.Constable; 34 import java.lang.constant.DynamicConstantDesc; 35 import java.util.Arrays; 36 import java.util.HashMap; 37 import java.util.Locale; 38 import java.util.Map; 39 import java.util.Objects; 40 import java.util.Optional; 41 42 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 43 import static java.lang.constant.ConstantDescs.CD_char; 44 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 45 46 /** 47 * The {@code Character} class is the {@linkplain 48 * java.lang##wrapperClass wrapper class} for values of the primitive 49 * type {@code char}. An object of type {@code Character} contains a 50 * single field whose type is {@code char}. 51 * 52 * <p>In addition, this class provides a large number of static methods for 53 * determining a character's category (lowercase letter, digit, etc.) 54 * and for converting characters from uppercase to lowercase and vice 55 * versa. 56 * 57 * <h2><a id="conformance">Unicode Conformance</a></h2> 58 * <p> 59 * The fields and methods of class {@code Character} are defined in terms 60 * of character information from the Unicode Standard, specifically the 61 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 62 * This file specifies properties including name and category for every 63 * assigned Unicode code point or character range. The file is available 64 * from the Unicode Consortium at 65 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 66 * <p> 67 * Character information is based on the Unicode Standard, version 16.0. 68 * <p> 69 * The Java platform has supported different versions of the Unicode 70 * Standard over time. Upgrades to newer versions of the Unicode Standard 71 * occurred in the following Java releases, each indicating the new version: 72 * <table class="striped"> 73 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 74 * <thead> 75 * <tr><th scope="col">Java release</th> 76 * <th scope="col">Unicode version</th></tr> 77 * </thead> 78 * <tbody> 79 * <tr><th scope="row" style="text-align:left">Java SE 24</th> 80 * <td>Unicode 16.0</td></tr> 81 * <tr><th scope="row" style="text-align:left">Java SE 22</th> 82 * <td>Unicode 15.1</td></tr> 83 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 84 * <td>Unicode 15.0</td></tr> 85 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 86 * <td>Unicode 14.0</td></tr> 87 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 88 * <td>Unicode 13.0</td></tr> 89 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 90 * <td>Unicode 12.1</td></tr> 91 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 92 * <td>Unicode 11.0</td></tr> 93 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 94 * <td>Unicode 10.0</td></tr> 95 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 96 * <td>Unicode 8.0</td></tr> 97 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 98 * <td>Unicode 6.2</td></tr> 99 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 100 * <td>Unicode 6.0</td></tr> 101 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 102 * <td>Unicode 4.0</td></tr> 103 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 104 * <td>Unicode 3.0</td></tr> 105 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 106 * <td>Unicode 2.0</td></tr> 107 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 108 * <td>Unicode 1.1.5</td></tr> 109 * </tbody> 110 * </table> 111 * Variations from these base Unicode versions, such as recognized appendixes, 112 * are documented elsewhere. 113 * <h2><a id="unicode">Unicode Character Representations</a></h2> 114 * 115 * <p>The {@code char} data type (and therefore the value that a 116 * {@code Character} object encapsulates) are based on the 117 * original Unicode specification, which defined characters as 118 * fixed-width 16-bit entities. The Unicode Standard has since been 119 * changed to allow for characters whose representation requires more 120 * than 16 bits. The range of legal <em>code point</em>s is now 121 * U+0000 to U+10FFFF, known as 122 * <em><a href="https://www.unicode.org/glossary/#unicode_scalar_value"> 123 * Unicode scalar value</a></em>. 124 * 125 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 126 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 127 * <a id="supplementary">Characters</a> whose code points are greater 128 * than U+FFFF are called <em>supplementary character</em>s. The Java 129 * platform uses the UTF-16 representation in {@code char} arrays and 130 * in the {@code String} and {@code StringBuffer} classes. In 131 * this representation, supplementary characters are represented as a pair 132 * of {@code char} values, the first from the <em>high-surrogates</em> 133 * range, (\uD800-\uDBFF), the second from the 134 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 135 * 136 * <p>A {@code char} value, therefore, represents Basic 137 * Multilingual Plane (BMP) code points, including the surrogate 138 * code points, or code units of the UTF-16 encoding. An 139 * {@code int} value represents all Unicode code points, 140 * including supplementary code points. The lower (least significant) 141 * 21 bits of {@code int} are used to represent Unicode code 142 * points and the upper (most significant) 11 bits must be zero. 143 * Unless otherwise specified, the behavior with respect to 144 * supplementary characters and surrogate {@code char} values is 145 * as follows: 146 * 147 * <ul> 148 * <li>The methods that only accept a {@code char} value cannot support 149 * supplementary characters. They treat {@code char} values from the 150 * surrogate ranges as undefined characters. For example, 151 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 152 * this specific value if followed by any low-surrogate value in a string 153 * would represent a letter. 154 * 155 * <li>The methods that accept an {@code int} value support all 156 * Unicode characters, including supplementary characters. For 157 * example, {@code Character.isLetter(0x2F81A)} returns 158 * {@code true} because the code point value represents a letter 159 * (a CJK ideograph). 160 * </ul> 161 * 162 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 163 * used for character values in the range between U+0000 and U+10FFFF, 164 * and <em>Unicode code unit</em> is used for 16-bit 165 * {@code char} values that are code units of the <em>UTF-16</em> 166 * encoding. For more information on Unicode terminology, refer to the 167 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 168 * 169 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 170 * class; programmers should treat instances that are {@linkplain #equals(Object) equal} 171 * as interchangeable and should not use instances for synchronization, mutexes, or 172 * with {@linkplain java.lang.ref.Reference object references}. 173 * 174 * <div class="preview-block"> 175 * <div class="preview-comment"> 176 * When preview features are enabled, {@code Character} is a {@linkplain Class#isValue value class}. 177 * Use of value class instances for synchronization, mutexes, or with 178 * {@linkplain java.lang.ref.Reference object references} result in 179 * {@link IdentityException}. 180 * </div> 181 * </div> 182 * 183 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 184 * @author Lee Boynton 185 * @author Guy Steele 186 * @author Akira Tanaka 187 * @author Martin Buchholz 188 * @author Ulf Zibis 189 * @since 1.0 190 */ 191 @jdk.internal.MigratedValueClass 192 @jdk.internal.ValueBased 193 public final class Character implements java.io.Serializable, Comparable<Character>, Constable { 194 /** 195 * The minimum radix available for conversion to and from strings. 196 * The constant value of this field is the smallest value permitted 197 * for the radix argument in radix-conversion methods such as the 198 * {@code digit} method, the {@code forDigit} method, and the 199 * {@code toString} method of class {@code Integer}. 200 * 201 * @see Character#digit(char, int) 202 * @see Character#forDigit(int, int) 203 * @see Integer#toString(int, int) 204 * @see Integer#valueOf(String) 205 */ 206 public static final int MIN_RADIX = 2; 207 208 /** 209 * The maximum radix available for conversion to and from strings. 210 * The constant value of this field is the largest value permitted 211 * for the radix argument in radix-conversion methods such as the 212 * {@code digit} method, the {@code forDigit} method, and the 213 * {@code toString} method of class {@code Integer}. 214 * 215 * @see Character#digit(char, int) 216 * @see Character#forDigit(int, int) 217 * @see Integer#toString(int, int) 218 * @see Integer#valueOf(String) 219 */ 220 public static final int MAX_RADIX = 36; 221 222 /** 223 * The constant value of this field is the smallest value of type 224 * {@code char}, {@code '\u005Cu0000'}. 225 * 226 * @since 1.0.2 227 */ 228 public static final char MIN_VALUE = '\u0000'; 229 230 /** 231 * The constant value of this field is the largest value of type 232 * {@code char}, {@code '\u005CuFFFF'}. 233 * 234 * @since 1.0.2 235 */ 236 public static final char MAX_VALUE = '\uFFFF'; 237 238 /** 239 * The {@code Class} instance representing the primitive type 240 * {@code char}. 241 * 242 * @since 1.1 243 */ 244 public static final Class<Character> TYPE = Class.getPrimitiveClass("char"); 245 246 /* 247 * Normative general types 248 */ 249 250 /* 251 * General character types 252 */ 253 254 /** 255 * General category "Cn" in the Unicode specification. 256 * @since 1.1 257 */ 258 public static final byte UNASSIGNED = 0; 259 260 /** 261 * General category "Lu" in the Unicode specification. 262 * @since 1.1 263 */ 264 public static final byte UPPERCASE_LETTER = 1; 265 266 /** 267 * General category "Ll" in the Unicode specification. 268 * @since 1.1 269 */ 270 public static final byte LOWERCASE_LETTER = 2; 271 272 /** 273 * General category "Lt" in the Unicode specification. 274 * @since 1.1 275 */ 276 public static final byte TITLECASE_LETTER = 3; 277 278 /** 279 * General category "Lm" in the Unicode specification. 280 * @since 1.1 281 */ 282 public static final byte MODIFIER_LETTER = 4; 283 284 /** 285 * General category "Lo" in the Unicode specification. 286 * @since 1.1 287 */ 288 public static final byte OTHER_LETTER = 5; 289 290 /** 291 * General category "Mn" in the Unicode specification. 292 * @since 1.1 293 */ 294 public static final byte NON_SPACING_MARK = 6; 295 296 /** 297 * General category "Me" in the Unicode specification. 298 * @since 1.1 299 */ 300 public static final byte ENCLOSING_MARK = 7; 301 302 /** 303 * General category "Mc" in the Unicode specification. 304 * @since 1.1 305 */ 306 public static final byte COMBINING_SPACING_MARK = 8; 307 308 /** 309 * General category "Nd" in the Unicode specification. 310 * @since 1.1 311 */ 312 public static final byte DECIMAL_DIGIT_NUMBER = 9; 313 314 /** 315 * General category "Nl" in the Unicode specification. 316 * @since 1.1 317 */ 318 public static final byte LETTER_NUMBER = 10; 319 320 /** 321 * General category "No" in the Unicode specification. 322 * @since 1.1 323 */ 324 public static final byte OTHER_NUMBER = 11; 325 326 /** 327 * General category "Zs" in the Unicode specification. 328 * @since 1.1 329 */ 330 public static final byte SPACE_SEPARATOR = 12; 331 332 /** 333 * General category "Zl" in the Unicode specification. 334 * @since 1.1 335 */ 336 public static final byte LINE_SEPARATOR = 13; 337 338 /** 339 * General category "Zp" in the Unicode specification. 340 * @since 1.1 341 */ 342 public static final byte PARAGRAPH_SEPARATOR = 14; 343 344 /** 345 * General category "Cc" in the Unicode specification. 346 * @since 1.1 347 */ 348 public static final byte CONTROL = 15; 349 350 /** 351 * General category "Cf" in the Unicode specification. 352 * @since 1.1 353 */ 354 public static final byte FORMAT = 16; 355 356 /** 357 * General category "Co" in the Unicode specification. 358 * @since 1.1 359 */ 360 public static final byte PRIVATE_USE = 18; 361 362 /** 363 * General category "Cs" in the Unicode specification. 364 * @since 1.1 365 */ 366 public static final byte SURROGATE = 19; 367 368 /** 369 * General category "Pd" in the Unicode specification. 370 * @since 1.1 371 */ 372 public static final byte DASH_PUNCTUATION = 20; 373 374 /** 375 * General category "Ps" in the Unicode specification. 376 * @since 1.1 377 */ 378 public static final byte START_PUNCTUATION = 21; 379 380 /** 381 * General category "Pe" in the Unicode specification. 382 * @since 1.1 383 */ 384 public static final byte END_PUNCTUATION = 22; 385 386 /** 387 * General category "Pc" in the Unicode specification. 388 * @since 1.1 389 */ 390 public static final byte CONNECTOR_PUNCTUATION = 23; 391 392 /** 393 * General category "Po" in the Unicode specification. 394 * @since 1.1 395 */ 396 public static final byte OTHER_PUNCTUATION = 24; 397 398 /** 399 * General category "Sm" in the Unicode specification. 400 * @since 1.1 401 */ 402 public static final byte MATH_SYMBOL = 25; 403 404 /** 405 * General category "Sc" in the Unicode specification. 406 * @since 1.1 407 */ 408 public static final byte CURRENCY_SYMBOL = 26; 409 410 /** 411 * General category "Sk" in the Unicode specification. 412 * @since 1.1 413 */ 414 public static final byte MODIFIER_SYMBOL = 27; 415 416 /** 417 * General category "So" in the Unicode specification. 418 * @since 1.1 419 */ 420 public static final byte OTHER_SYMBOL = 28; 421 422 /** 423 * General category "Pi" in the Unicode specification. 424 * @since 1.4 425 */ 426 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 427 428 /** 429 * General category "Pf" in the Unicode specification. 430 * @since 1.4 431 */ 432 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 433 434 /** 435 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 436 */ 437 static final int ERROR = 0xFFFFFFFF; 438 439 440 /** 441 * Undefined bidirectional character type. Undefined {@code char} 442 * values have undefined directionality in the Unicode specification. 443 * @since 1.4 444 */ 445 public static final byte DIRECTIONALITY_UNDEFINED = -1; 446 447 /** 448 * Strong bidirectional character type "L" in the Unicode specification. 449 * @since 1.4 450 */ 451 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 452 453 /** 454 * Strong bidirectional character type "R" in the Unicode specification. 455 * @since 1.4 456 */ 457 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 458 459 /** 460 * Strong bidirectional character type "AL" in the Unicode specification. 461 * @since 1.4 462 */ 463 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 464 465 /** 466 * Weak bidirectional character type "EN" in the Unicode specification. 467 * @since 1.4 468 */ 469 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 470 471 /** 472 * Weak bidirectional character type "ES" in the Unicode specification. 473 * @since 1.4 474 */ 475 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 476 477 /** 478 * Weak bidirectional character type "ET" in the Unicode specification. 479 * @since 1.4 480 */ 481 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 482 483 /** 484 * Weak bidirectional character type "AN" in the Unicode specification. 485 * @since 1.4 486 */ 487 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 488 489 /** 490 * Weak bidirectional character type "CS" in the Unicode specification. 491 * @since 1.4 492 */ 493 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 494 495 /** 496 * Weak bidirectional character type "NSM" in the Unicode specification. 497 * @since 1.4 498 */ 499 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 500 501 /** 502 * Weak bidirectional character type "BN" in the Unicode specification. 503 * @since 1.4 504 */ 505 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 506 507 /** 508 * Neutral bidirectional character type "B" in the Unicode specification. 509 * @since 1.4 510 */ 511 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 512 513 /** 514 * Neutral bidirectional character type "S" in the Unicode specification. 515 * @since 1.4 516 */ 517 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 518 519 /** 520 * Neutral bidirectional character type "WS" in the Unicode specification. 521 * @since 1.4 522 */ 523 public static final byte DIRECTIONALITY_WHITESPACE = 12; 524 525 /** 526 * Neutral bidirectional character type "ON" in the Unicode specification. 527 * @since 1.4 528 */ 529 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 530 531 /** 532 * Strong bidirectional character type "LRE" in the Unicode specification. 533 * @since 1.4 534 */ 535 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 536 537 /** 538 * Strong bidirectional character type "LRO" in the Unicode specification. 539 * @since 1.4 540 */ 541 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 542 543 /** 544 * Strong bidirectional character type "RLE" in the Unicode specification. 545 * @since 1.4 546 */ 547 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 548 549 /** 550 * Strong bidirectional character type "RLO" in the Unicode specification. 551 * @since 1.4 552 */ 553 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 554 555 /** 556 * Weak bidirectional character type "PDF" in the Unicode specification. 557 * @since 1.4 558 */ 559 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 560 561 /** 562 * Weak bidirectional character type "LRI" in the Unicode specification. 563 * @since 9 564 */ 565 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 566 567 /** 568 * Weak bidirectional character type "RLI" in the Unicode specification. 569 * @since 9 570 */ 571 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 572 573 /** 574 * Weak bidirectional character type "FSI" in the Unicode specification. 575 * @since 9 576 */ 577 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 578 579 /** 580 * Weak bidirectional character type "PDI" in the Unicode specification. 581 * @since 9 582 */ 583 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 584 585 /** 586 * The minimum value of a 587 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 588 * Unicode high-surrogate code unit</a> 589 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 590 * A high-surrogate is also known as a <i>leading-surrogate</i>. 591 * 592 * @since 1.5 593 */ 594 public static final char MIN_HIGH_SURROGATE = '\uD800'; 595 596 /** 597 * The maximum value of a 598 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 599 * Unicode high-surrogate code unit</a> 600 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 601 * A high-surrogate is also known as a <i>leading-surrogate</i>. 602 * 603 * @since 1.5 604 */ 605 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 606 607 /** 608 * The minimum value of a 609 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 610 * Unicode low-surrogate code unit</a> 611 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 612 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 613 * 614 * @since 1.5 615 */ 616 public static final char MIN_LOW_SURROGATE = '\uDC00'; 617 618 /** 619 * The maximum value of a 620 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 621 * Unicode low-surrogate code unit</a> 622 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 623 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 624 * 625 * @since 1.5 626 */ 627 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 628 629 /** 630 * The minimum value of a Unicode surrogate code unit in the 631 * UTF-16 encoding, constant {@code '\u005CuD800'}. 632 * 633 * @since 1.5 634 */ 635 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 636 637 /** 638 * The maximum value of a Unicode surrogate code unit in the 639 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 640 * 641 * @since 1.5 642 */ 643 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 644 645 /** 646 * The minimum value of a 647 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 648 * Unicode supplementary code point</a>, constant {@code U+10000}. 649 * 650 * @since 1.5 651 */ 652 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 653 654 /** 655 * The minimum value of a 656 * <a href="http://www.unicode.org/glossary/#code_point"> 657 * Unicode code point</a>, constant {@code U+0000}. 658 * 659 * @since 1.5 660 */ 661 public static final int MIN_CODE_POINT = 0x000000; 662 663 /** 664 * The maximum value of a 665 * <a href="http://www.unicode.org/glossary/#code_point"> 666 * Unicode code point</a>, constant {@code U+10FFFF}. 667 * 668 * @since 1.5 669 */ 670 public static final int MAX_CODE_POINT = 0X10FFFF; 671 672 /** 673 * Returns an {@link Optional} containing the nominal descriptor for this 674 * instance. 675 * 676 * @return an {@link Optional} describing the {@linkplain Character} instance 677 * @since 15 678 */ 679 @Override 680 public Optional<DynamicConstantDesc<Character>> describeConstable() { 681 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 682 } 683 684 /** 685 * Instances of this class represent particular subsets of the Unicode 686 * character set. The only family of subsets defined in the 687 * {@code Character} class is {@link Character.UnicodeBlock}. 688 * Other portions of the Java API may define other subsets for their 689 * own purposes. 690 * 691 * @since 1.2 692 */ 693 public static class Subset { 694 695 private String name; 696 697 /** 698 * Constructs a new {@code Subset} instance. 699 * 700 * @param name The name of this subset 701 * @throws NullPointerException if name is {@code null} 702 */ 703 protected Subset(String name) { 704 if (name == null) { 705 throw new NullPointerException("name"); 706 } 707 this.name = name; 708 } 709 710 /** 711 * Compares two {@code Subset} objects for equality. 712 * This method returns {@code true} if and only if 713 * {@code this} and the argument refer to the same 714 * object; since this method is {@code final}, this 715 * guarantee holds for all subclasses. 716 */ 717 public final boolean equals(Object obj) { 718 return (this == obj); 719 } 720 721 /** 722 * Returns the standard hash code as defined by the 723 * {@link Object#hashCode} method. This method 724 * is {@code final} in order to ensure that the 725 * {@code equals} and {@code hashCode} methods will 726 * be consistent in all subclasses. 727 */ 728 public final int hashCode() { 729 return super.hashCode(); 730 } 731 732 /** 733 * Returns the name of this subset. 734 */ 735 public final String toString() { 736 return name; 737 } 738 } 739 740 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 741 // for the latest specification of Unicode Blocks. 742 743 /** 744 * A family of character subsets representing the character blocks in the 745 * Unicode specification. Character blocks generally define characters 746 * used for a specific script or purpose. A character is contained by 747 * at most one Unicode block. 748 * 749 * @since 1.2 750 */ 751 public static final class UnicodeBlock extends Subset { 752 /** 753 * NUM_ENTITIES should match the total number of UnicodeBlocks. 754 * It should be adjusted whenever the Unicode Character Database 755 * is upgraded. 756 */ 757 private static final int NUM_ENTITIES = 782; 758 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 759 760 /** 761 * Creates a UnicodeBlock with the given identifier name. 762 * This name must be the same as the block identifier. 763 */ 764 private UnicodeBlock(String idName) { 765 super(idName); 766 map.put(idName, this); 767 } 768 769 /** 770 * Creates a UnicodeBlock with the given identifier name and 771 * alias name. 772 */ 773 private UnicodeBlock(String idName, String alias) { 774 this(idName); 775 map.put(alias, this); 776 } 777 778 /** 779 * Creates a UnicodeBlock with the given identifier name and 780 * alias names. 781 */ 782 private UnicodeBlock(String idName, String... aliases) { 783 this(idName); 784 for (String alias : aliases) 785 map.put(alias, this); 786 } 787 788 /** 789 * Constant for the "Basic Latin" Unicode character block. 790 * @since 1.2 791 */ 792 public static final UnicodeBlock BASIC_LATIN = 793 new UnicodeBlock("BASIC_LATIN", 794 "BASIC LATIN", 795 "BASICLATIN"); 796 797 /** 798 * Constant for the "Latin-1 Supplement" Unicode character block. 799 * @since 1.2 800 */ 801 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 802 new UnicodeBlock("LATIN_1_SUPPLEMENT", 803 "LATIN-1 SUPPLEMENT", 804 "LATIN-1SUPPLEMENT"); 805 806 /** 807 * Constant for the "Latin Extended-A" Unicode character block. 808 * @since 1.2 809 */ 810 public static final UnicodeBlock LATIN_EXTENDED_A = 811 new UnicodeBlock("LATIN_EXTENDED_A", 812 "LATIN EXTENDED-A", 813 "LATINEXTENDED-A"); 814 815 /** 816 * Constant for the "Latin Extended-B" Unicode character block. 817 * @since 1.2 818 */ 819 public static final UnicodeBlock LATIN_EXTENDED_B = 820 new UnicodeBlock("LATIN_EXTENDED_B", 821 "LATIN EXTENDED-B", 822 "LATINEXTENDED-B"); 823 824 /** 825 * Constant for the "IPA Extensions" Unicode character block. 826 * @since 1.2 827 */ 828 public static final UnicodeBlock IPA_EXTENSIONS = 829 new UnicodeBlock("IPA_EXTENSIONS", 830 "IPA EXTENSIONS", 831 "IPAEXTENSIONS"); 832 833 /** 834 * Constant for the "Spacing Modifier Letters" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 838 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 839 "SPACING MODIFIER LETTERS", 840 "SPACINGMODIFIERLETTERS"); 841 842 /** 843 * Constant for the "Combining Diacritical Marks" Unicode character block. 844 * @since 1.2 845 */ 846 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 847 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 848 "COMBINING DIACRITICAL MARKS", 849 "COMBININGDIACRITICALMARKS"); 850 851 /** 852 * Constant for the "Greek and Coptic" Unicode character block. 853 * <p> 854 * This block was previously known as the "Greek" block. 855 * 856 * @since 1.2 857 */ 858 public static final UnicodeBlock GREEK = 859 new UnicodeBlock("GREEK", 860 "GREEK AND COPTIC", 861 "GREEKANDCOPTIC"); 862 863 /** 864 * Constant for the "Cyrillic" Unicode character block. 865 * @since 1.2 866 */ 867 public static final UnicodeBlock CYRILLIC = 868 new UnicodeBlock("CYRILLIC"); 869 870 /** 871 * Constant for the "Armenian" Unicode character block. 872 * @since 1.2 873 */ 874 public static final UnicodeBlock ARMENIAN = 875 new UnicodeBlock("ARMENIAN"); 876 877 /** 878 * Constant for the "Hebrew" Unicode character block. 879 * @since 1.2 880 */ 881 public static final UnicodeBlock HEBREW = 882 new UnicodeBlock("HEBREW"); 883 884 /** 885 * Constant for the "Arabic" Unicode character block. 886 * @since 1.2 887 */ 888 public static final UnicodeBlock ARABIC = 889 new UnicodeBlock("ARABIC"); 890 891 /** 892 * Constant for the "Devanagari" Unicode character block. 893 * @since 1.2 894 */ 895 public static final UnicodeBlock DEVANAGARI = 896 new UnicodeBlock("DEVANAGARI"); 897 898 /** 899 * Constant for the "Bengali" Unicode character block. 900 * @since 1.2 901 */ 902 public static final UnicodeBlock BENGALI = 903 new UnicodeBlock("BENGALI"); 904 905 /** 906 * Constant for the "Gurmukhi" Unicode character block. 907 * @since 1.2 908 */ 909 public static final UnicodeBlock GURMUKHI = 910 new UnicodeBlock("GURMUKHI"); 911 912 /** 913 * Constant for the "Gujarati" Unicode character block. 914 * @since 1.2 915 */ 916 public static final UnicodeBlock GUJARATI = 917 new UnicodeBlock("GUJARATI"); 918 919 /** 920 * Constant for the "Oriya" Unicode character block. 921 * @since 1.2 922 */ 923 public static final UnicodeBlock ORIYA = 924 new UnicodeBlock("ORIYA"); 925 926 /** 927 * Constant for the "Tamil" Unicode character block. 928 * @since 1.2 929 */ 930 public static final UnicodeBlock TAMIL = 931 new UnicodeBlock("TAMIL"); 932 933 /** 934 * Constant for the "Telugu" Unicode character block. 935 * @since 1.2 936 */ 937 public static final UnicodeBlock TELUGU = 938 new UnicodeBlock("TELUGU"); 939 940 /** 941 * Constant for the "Kannada" Unicode character block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock KANNADA = 945 new UnicodeBlock("KANNADA"); 946 947 /** 948 * Constant for the "Malayalam" Unicode character block. 949 * @since 1.2 950 */ 951 public static final UnicodeBlock MALAYALAM = 952 new UnicodeBlock("MALAYALAM"); 953 954 /** 955 * Constant for the "Thai" Unicode character block. 956 * @since 1.2 957 */ 958 public static final UnicodeBlock THAI = 959 new UnicodeBlock("THAI"); 960 961 /** 962 * Constant for the "Lao" Unicode character block. 963 * @since 1.2 964 */ 965 public static final UnicodeBlock LAO = 966 new UnicodeBlock("LAO"); 967 968 /** 969 * Constant for the "Tibetan" Unicode character block. 970 * @since 1.2 971 */ 972 public static final UnicodeBlock TIBETAN = 973 new UnicodeBlock("TIBETAN"); 974 975 /** 976 * Constant for the "Georgian" Unicode character block. 977 * @since 1.2 978 */ 979 public static final UnicodeBlock GEORGIAN = 980 new UnicodeBlock("GEORGIAN"); 981 982 /** 983 * Constant for the "Hangul Jamo" Unicode character block. 984 * @since 1.2 985 */ 986 public static final UnicodeBlock HANGUL_JAMO = 987 new UnicodeBlock("HANGUL_JAMO", 988 "HANGUL JAMO", 989 "HANGULJAMO"); 990 991 /** 992 * Constant for the "Latin Extended Additional" Unicode character block. 993 * @since 1.2 994 */ 995 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 996 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 997 "LATIN EXTENDED ADDITIONAL", 998 "LATINEXTENDEDADDITIONAL"); 999 1000 /** 1001 * Constant for the "Greek Extended" Unicode character block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock GREEK_EXTENDED = 1005 new UnicodeBlock("GREEK_EXTENDED", 1006 "GREEK EXTENDED", 1007 "GREEKEXTENDED"); 1008 1009 /** 1010 * Constant for the "General Punctuation" Unicode character block. 1011 * @since 1.2 1012 */ 1013 public static final UnicodeBlock GENERAL_PUNCTUATION = 1014 new UnicodeBlock("GENERAL_PUNCTUATION", 1015 "GENERAL PUNCTUATION", 1016 "GENERALPUNCTUATION"); 1017 1018 /** 1019 * Constant for the "Superscripts and Subscripts" Unicode character 1020 * block. 1021 * @since 1.2 1022 */ 1023 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1024 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1025 "SUPERSCRIPTS AND SUBSCRIPTS", 1026 "SUPERSCRIPTSANDSUBSCRIPTS"); 1027 1028 /** 1029 * Constant for the "Currency Symbols" Unicode character block. 1030 * @since 1.2 1031 */ 1032 public static final UnicodeBlock CURRENCY_SYMBOLS = 1033 new UnicodeBlock("CURRENCY_SYMBOLS", 1034 "CURRENCY SYMBOLS", 1035 "CURRENCYSYMBOLS"); 1036 1037 /** 1038 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1039 * character block. 1040 * <p> 1041 * This block was previously known as "Combining Marks for Symbols". 1042 * @since 1.2 1043 */ 1044 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1045 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1046 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1047 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1048 "COMBINING MARKS FOR SYMBOLS", 1049 "COMBININGMARKSFORSYMBOLS"); 1050 1051 /** 1052 * Constant for the "Letterlike Symbols" Unicode character block. 1053 * @since 1.2 1054 */ 1055 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1056 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1057 "LETTERLIKE SYMBOLS", 1058 "LETTERLIKESYMBOLS"); 1059 1060 /** 1061 * Constant for the "Number Forms" Unicode character block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock NUMBER_FORMS = 1065 new UnicodeBlock("NUMBER_FORMS", 1066 "NUMBER FORMS", 1067 "NUMBERFORMS"); 1068 1069 /** 1070 * Constant for the "Arrows" Unicode character block. 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock ARROWS = 1074 new UnicodeBlock("ARROWS"); 1075 1076 /** 1077 * Constant for the "Mathematical Operators" Unicode character block. 1078 * @since 1.2 1079 */ 1080 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1081 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1082 "MATHEMATICAL OPERATORS", 1083 "MATHEMATICALOPERATORS"); 1084 1085 /** 1086 * Constant for the "Miscellaneous Technical" Unicode character block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1090 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1091 "MISCELLANEOUS TECHNICAL", 1092 "MISCELLANEOUSTECHNICAL"); 1093 1094 /** 1095 * Constant for the "Control Pictures" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock CONTROL_PICTURES = 1099 new UnicodeBlock("CONTROL_PICTURES", 1100 "CONTROL PICTURES", 1101 "CONTROLPICTURES"); 1102 1103 /** 1104 * Constant for the "Optical Character Recognition" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1108 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1109 "OPTICAL CHARACTER RECOGNITION", 1110 "OPTICALCHARACTERRECOGNITION"); 1111 1112 /** 1113 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1117 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1118 "ENCLOSED ALPHANUMERICS", 1119 "ENCLOSEDALPHANUMERICS"); 1120 1121 /** 1122 * Constant for the "Box Drawing" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock BOX_DRAWING = 1126 new UnicodeBlock("BOX_DRAWING", 1127 "BOX DRAWING", 1128 "BOXDRAWING"); 1129 1130 /** 1131 * Constant for the "Block Elements" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock BLOCK_ELEMENTS = 1135 new UnicodeBlock("BLOCK_ELEMENTS", 1136 "BLOCK ELEMENTS", 1137 "BLOCKELEMENTS"); 1138 1139 /** 1140 * Constant for the "Geometric Shapes" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock GEOMETRIC_SHAPES = 1144 new UnicodeBlock("GEOMETRIC_SHAPES", 1145 "GEOMETRIC SHAPES", 1146 "GEOMETRICSHAPES"); 1147 1148 /** 1149 * Constant for the "Miscellaneous Symbols" Unicode character block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1153 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1154 "MISCELLANEOUS SYMBOLS", 1155 "MISCELLANEOUSSYMBOLS"); 1156 1157 /** 1158 * Constant for the "Dingbats" Unicode character block. 1159 * @since 1.2 1160 */ 1161 public static final UnicodeBlock DINGBATS = 1162 new UnicodeBlock("DINGBATS"); 1163 1164 /** 1165 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1166 * @since 1.2 1167 */ 1168 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1169 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1170 "CJK SYMBOLS AND PUNCTUATION", 1171 "CJKSYMBOLSANDPUNCTUATION"); 1172 1173 /** 1174 * Constant for the "Hiragana" Unicode character block. 1175 * @since 1.2 1176 */ 1177 public static final UnicodeBlock HIRAGANA = 1178 new UnicodeBlock("HIRAGANA"); 1179 1180 /** 1181 * Constant for the "Katakana" Unicode character block. 1182 * @since 1.2 1183 */ 1184 public static final UnicodeBlock KATAKANA = 1185 new UnicodeBlock("KATAKANA"); 1186 1187 /** 1188 * Constant for the "Bopomofo" Unicode character block. 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock BOPOMOFO = 1192 new UnicodeBlock("BOPOMOFO"); 1193 1194 /** 1195 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1196 * @since 1.2 1197 */ 1198 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1199 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1200 "HANGUL COMPATIBILITY JAMO", 1201 "HANGULCOMPATIBILITYJAMO"); 1202 1203 /** 1204 * Constant for the "Kanbun" Unicode character block. 1205 * @since 1.2 1206 */ 1207 public static final UnicodeBlock KANBUN = 1208 new UnicodeBlock("KANBUN"); 1209 1210 /** 1211 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1212 * @since 1.2 1213 */ 1214 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1215 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1216 "ENCLOSED CJK LETTERS AND MONTHS", 1217 "ENCLOSEDCJKLETTERSANDMONTHS"); 1218 1219 /** 1220 * Constant for the "CJK Compatibility" Unicode character block. 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock CJK_COMPATIBILITY = 1224 new UnicodeBlock("CJK_COMPATIBILITY", 1225 "CJK COMPATIBILITY", 1226 "CJKCOMPATIBILITY"); 1227 1228 /** 1229 * Constant for the "CJK Unified Ideographs" Unicode character block. 1230 * @since 1.2 1231 */ 1232 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1233 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1234 "CJK UNIFIED IDEOGRAPHS", 1235 "CJKUNIFIEDIDEOGRAPHS"); 1236 1237 /** 1238 * Constant for the "Hangul Syllables" Unicode character block. 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock HANGUL_SYLLABLES = 1242 new UnicodeBlock("HANGUL_SYLLABLES", 1243 "HANGUL SYLLABLES", 1244 "HANGULSYLLABLES"); 1245 1246 /** 1247 * Constant for the "Private Use Area" Unicode character block. 1248 * @since 1.2 1249 */ 1250 public static final UnicodeBlock PRIVATE_USE_AREA = 1251 new UnicodeBlock("PRIVATE_USE_AREA", 1252 "PRIVATE USE AREA", 1253 "PRIVATEUSEAREA"); 1254 1255 /** 1256 * Constant for the "CJK Compatibility Ideographs" Unicode character 1257 * block. 1258 * @since 1.2 1259 */ 1260 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1261 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1262 "CJK COMPATIBILITY IDEOGRAPHS", 1263 "CJKCOMPATIBILITYIDEOGRAPHS"); 1264 1265 /** 1266 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1267 * @since 1.2 1268 */ 1269 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1270 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1271 "ALPHABETIC PRESENTATION FORMS", 1272 "ALPHABETICPRESENTATIONFORMS"); 1273 1274 /** 1275 * Constant for the "Arabic Presentation Forms-A" Unicode character 1276 * block. 1277 * @since 1.2 1278 */ 1279 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1280 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1281 "ARABIC PRESENTATION FORMS-A", 1282 "ARABICPRESENTATIONFORMS-A"); 1283 1284 /** 1285 * Constant for the "Combining Half Marks" Unicode character block. 1286 * @since 1.2 1287 */ 1288 public static final UnicodeBlock COMBINING_HALF_MARKS = 1289 new UnicodeBlock("COMBINING_HALF_MARKS", 1290 "COMBINING HALF MARKS", 1291 "COMBININGHALFMARKS"); 1292 1293 /** 1294 * Constant for the "CJK Compatibility Forms" Unicode character block. 1295 * @since 1.2 1296 */ 1297 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1298 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1299 "CJK COMPATIBILITY FORMS", 1300 "CJKCOMPATIBILITYFORMS"); 1301 1302 /** 1303 * Constant for the "Small Form Variants" Unicode character block. 1304 * @since 1.2 1305 */ 1306 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1307 new UnicodeBlock("SMALL_FORM_VARIANTS", 1308 "SMALL FORM VARIANTS", 1309 "SMALLFORMVARIANTS"); 1310 1311 /** 1312 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1313 * @since 1.2 1314 */ 1315 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1316 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1317 "ARABIC PRESENTATION FORMS-B", 1318 "ARABICPRESENTATIONFORMS-B"); 1319 1320 /** 1321 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1322 * block. 1323 * @since 1.2 1324 */ 1325 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1326 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1327 "HALFWIDTH AND FULLWIDTH FORMS", 1328 "HALFWIDTHANDFULLWIDTHFORMS"); 1329 1330 /** 1331 * Constant for the "Specials" Unicode character block. 1332 * @since 1.2 1333 */ 1334 public static final UnicodeBlock SPECIALS = 1335 new UnicodeBlock("SPECIALS"); 1336 1337 /** 1338 * @deprecated 1339 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1340 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1341 * These constants match the block definitions of the Unicode Standard. 1342 * The {@link #of(char)} and {@link #of(int)} methods return the 1343 * standard constants. 1344 */ 1345 @Deprecated(since="1.5") 1346 public static final UnicodeBlock SURROGATES_AREA = 1347 new UnicodeBlock("SURROGATES_AREA"); 1348 1349 /** 1350 * Constant for the "Syriac" Unicode character block. 1351 * @since 1.4 1352 */ 1353 public static final UnicodeBlock SYRIAC = 1354 new UnicodeBlock("SYRIAC"); 1355 1356 /** 1357 * Constant for the "Thaana" Unicode character block. 1358 * @since 1.4 1359 */ 1360 public static final UnicodeBlock THAANA = 1361 new UnicodeBlock("THAANA"); 1362 1363 /** 1364 * Constant for the "Sinhala" Unicode character block. 1365 * @since 1.4 1366 */ 1367 public static final UnicodeBlock SINHALA = 1368 new UnicodeBlock("SINHALA"); 1369 1370 /** 1371 * Constant for the "Myanmar" Unicode character block. 1372 * @since 1.4 1373 */ 1374 public static final UnicodeBlock MYANMAR = 1375 new UnicodeBlock("MYANMAR"); 1376 1377 /** 1378 * Constant for the "Ethiopic" Unicode character block. 1379 * @since 1.4 1380 */ 1381 public static final UnicodeBlock ETHIOPIC = 1382 new UnicodeBlock("ETHIOPIC"); 1383 1384 /** 1385 * Constant for the "Cherokee" Unicode character block. 1386 * @since 1.4 1387 */ 1388 public static final UnicodeBlock CHEROKEE = 1389 new UnicodeBlock("CHEROKEE"); 1390 1391 /** 1392 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1393 * @since 1.4 1394 */ 1395 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1396 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1397 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1398 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1399 1400 /** 1401 * Constant for the "Ogham" Unicode character block. 1402 * @since 1.4 1403 */ 1404 public static final UnicodeBlock OGHAM = 1405 new UnicodeBlock("OGHAM"); 1406 1407 /** 1408 * Constant for the "Runic" Unicode character block. 1409 * @since 1.4 1410 */ 1411 public static final UnicodeBlock RUNIC = 1412 new UnicodeBlock("RUNIC"); 1413 1414 /** 1415 * Constant for the "Khmer" Unicode character block. 1416 * @since 1.4 1417 */ 1418 public static final UnicodeBlock KHMER = 1419 new UnicodeBlock("KHMER"); 1420 1421 /** 1422 * Constant for the "Mongolian" Unicode character block. 1423 * @since 1.4 1424 */ 1425 public static final UnicodeBlock MONGOLIAN = 1426 new UnicodeBlock("MONGOLIAN"); 1427 1428 /** 1429 * Constant for the "Braille Patterns" Unicode character block. 1430 * @since 1.4 1431 */ 1432 public static final UnicodeBlock BRAILLE_PATTERNS = 1433 new UnicodeBlock("BRAILLE_PATTERNS", 1434 "BRAILLE PATTERNS", 1435 "BRAILLEPATTERNS"); 1436 1437 /** 1438 * Constant for the "CJK Radicals Supplement" Unicode character block. 1439 * @since 1.4 1440 */ 1441 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1442 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1443 "CJK RADICALS SUPPLEMENT", 1444 "CJKRADICALSSUPPLEMENT"); 1445 1446 /** 1447 * Constant for the "Kangxi Radicals" Unicode character block. 1448 * @since 1.4 1449 */ 1450 public static final UnicodeBlock KANGXI_RADICALS = 1451 new UnicodeBlock("KANGXI_RADICALS", 1452 "KANGXI RADICALS", 1453 "KANGXIRADICALS"); 1454 1455 /** 1456 * Constant for the "Ideographic Description Characters" Unicode character block. 1457 * @since 1.4 1458 */ 1459 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1460 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1461 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1462 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1463 1464 /** 1465 * Constant for the "Bopomofo Extended" Unicode character block. 1466 * @since 1.4 1467 */ 1468 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1469 new UnicodeBlock("BOPOMOFO_EXTENDED", 1470 "BOPOMOFO EXTENDED", 1471 "BOPOMOFOEXTENDED"); 1472 1473 /** 1474 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1475 * @since 1.4 1476 */ 1477 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1478 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1479 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1480 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1481 1482 /** 1483 * Constant for the "Yi Syllables" Unicode character block. 1484 * @since 1.4 1485 */ 1486 public static final UnicodeBlock YI_SYLLABLES = 1487 new UnicodeBlock("YI_SYLLABLES", 1488 "YI SYLLABLES", 1489 "YISYLLABLES"); 1490 1491 /** 1492 * Constant for the "Yi Radicals" Unicode character block. 1493 * @since 1.4 1494 */ 1495 public static final UnicodeBlock YI_RADICALS = 1496 new UnicodeBlock("YI_RADICALS", 1497 "YI RADICALS", 1498 "YIRADICALS"); 1499 1500 /** 1501 * Constant for the "Cyrillic Supplement" Unicode character block. 1502 * This block was previously known as the "Cyrillic Supplementary" block. 1503 * @since 1.5 1504 */ 1505 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1506 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1507 "CYRILLIC SUPPLEMENTARY", 1508 "CYRILLICSUPPLEMENTARY", 1509 "CYRILLIC SUPPLEMENT", 1510 "CYRILLICSUPPLEMENT"); 1511 1512 /** 1513 * Constant for the "Tagalog" Unicode character block. 1514 * @since 1.5 1515 */ 1516 public static final UnicodeBlock TAGALOG = 1517 new UnicodeBlock("TAGALOG"); 1518 1519 /** 1520 * Constant for the "Hanunoo" Unicode character block. 1521 * @since 1.5 1522 */ 1523 public static final UnicodeBlock HANUNOO = 1524 new UnicodeBlock("HANUNOO"); 1525 1526 /** 1527 * Constant for the "Buhid" Unicode character block. 1528 * @since 1.5 1529 */ 1530 public static final UnicodeBlock BUHID = 1531 new UnicodeBlock("BUHID"); 1532 1533 /** 1534 * Constant for the "Tagbanwa" Unicode character block. 1535 * @since 1.5 1536 */ 1537 public static final UnicodeBlock TAGBANWA = 1538 new UnicodeBlock("TAGBANWA"); 1539 1540 /** 1541 * Constant for the "Limbu" Unicode character block. 1542 * @since 1.5 1543 */ 1544 public static final UnicodeBlock LIMBU = 1545 new UnicodeBlock("LIMBU"); 1546 1547 /** 1548 * Constant for the "Tai Le" Unicode character block. 1549 * @since 1.5 1550 */ 1551 public static final UnicodeBlock TAI_LE = 1552 new UnicodeBlock("TAI_LE", 1553 "TAI LE", 1554 "TAILE"); 1555 1556 /** 1557 * Constant for the "Khmer Symbols" Unicode character block. 1558 * @since 1.5 1559 */ 1560 public static final UnicodeBlock KHMER_SYMBOLS = 1561 new UnicodeBlock("KHMER_SYMBOLS", 1562 "KHMER SYMBOLS", 1563 "KHMERSYMBOLS"); 1564 1565 /** 1566 * Constant for the "Phonetic Extensions" Unicode character block. 1567 * @since 1.5 1568 */ 1569 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1570 new UnicodeBlock("PHONETIC_EXTENSIONS", 1571 "PHONETIC EXTENSIONS", 1572 "PHONETICEXTENSIONS"); 1573 1574 /** 1575 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1576 * @since 1.5 1577 */ 1578 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1579 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1580 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1581 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1582 1583 /** 1584 * Constant for the "Supplemental Arrows-A" Unicode character block. 1585 * @since 1.5 1586 */ 1587 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1588 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1589 "SUPPLEMENTAL ARROWS-A", 1590 "SUPPLEMENTALARROWS-A"); 1591 1592 /** 1593 * Constant for the "Supplemental Arrows-B" Unicode character block. 1594 * @since 1.5 1595 */ 1596 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1597 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1598 "SUPPLEMENTAL ARROWS-B", 1599 "SUPPLEMENTALARROWS-B"); 1600 1601 /** 1602 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1603 * character block. 1604 * @since 1.5 1605 */ 1606 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1607 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1608 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1609 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1610 1611 /** 1612 * Constant for the "Supplemental Mathematical Operators" Unicode 1613 * character block. 1614 * @since 1.5 1615 */ 1616 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1617 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1618 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1619 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1620 1621 /** 1622 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1623 * block. 1624 * @since 1.5 1625 */ 1626 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1627 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1628 "MISCELLANEOUS SYMBOLS AND ARROWS", 1629 "MISCELLANEOUSSYMBOLSANDARROWS"); 1630 1631 /** 1632 * Constant for the "Katakana Phonetic Extensions" Unicode character 1633 * block. 1634 * @since 1.5 1635 */ 1636 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1637 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1638 "KATAKANA PHONETIC EXTENSIONS", 1639 "KATAKANAPHONETICEXTENSIONS"); 1640 1641 /** 1642 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1643 * @since 1.5 1644 */ 1645 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1646 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1647 "YIJING HEXAGRAM SYMBOLS", 1648 "YIJINGHEXAGRAMSYMBOLS"); 1649 1650 /** 1651 * Constant for the "Variation Selectors" Unicode character block. 1652 * @since 1.5 1653 */ 1654 public static final UnicodeBlock VARIATION_SELECTORS = 1655 new UnicodeBlock("VARIATION_SELECTORS", 1656 "VARIATION SELECTORS", 1657 "VARIATIONSELECTORS"); 1658 1659 /** 1660 * Constant for the "Linear B Syllabary" Unicode character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1664 new UnicodeBlock("LINEAR_B_SYLLABARY", 1665 "LINEAR B SYLLABARY", 1666 "LINEARBSYLLABARY"); 1667 1668 /** 1669 * Constant for the "Linear B Ideograms" Unicode character block. 1670 * @since 1.5 1671 */ 1672 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1673 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1674 "LINEAR B IDEOGRAMS", 1675 "LINEARBIDEOGRAMS"); 1676 1677 /** 1678 * Constant for the "Aegean Numbers" Unicode character block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock AEGEAN_NUMBERS = 1682 new UnicodeBlock("AEGEAN_NUMBERS", 1683 "AEGEAN NUMBERS", 1684 "AEGEANNUMBERS"); 1685 1686 /** 1687 * Constant for the "Old Italic" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock OLD_ITALIC = 1691 new UnicodeBlock("OLD_ITALIC", 1692 "OLD ITALIC", 1693 "OLDITALIC"); 1694 1695 /** 1696 * Constant for the "Gothic" Unicode character block. 1697 * @since 1.5 1698 */ 1699 public static final UnicodeBlock GOTHIC = 1700 new UnicodeBlock("GOTHIC"); 1701 1702 /** 1703 * Constant for the "Ugaritic" Unicode character block. 1704 * @since 1.5 1705 */ 1706 public static final UnicodeBlock UGARITIC = 1707 new UnicodeBlock("UGARITIC"); 1708 1709 /** 1710 * Constant for the "Deseret" Unicode character block. 1711 * @since 1.5 1712 */ 1713 public static final UnicodeBlock DESERET = 1714 new UnicodeBlock("DESERET"); 1715 1716 /** 1717 * Constant for the "Shavian" Unicode character block. 1718 * @since 1.5 1719 */ 1720 public static final UnicodeBlock SHAVIAN = 1721 new UnicodeBlock("SHAVIAN"); 1722 1723 /** 1724 * Constant for the "Osmanya" Unicode character block. 1725 * @since 1.5 1726 */ 1727 public static final UnicodeBlock OSMANYA = 1728 new UnicodeBlock("OSMANYA"); 1729 1730 /** 1731 * Constant for the "Cypriot Syllabary" Unicode character block. 1732 * @since 1.5 1733 */ 1734 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1735 new UnicodeBlock("CYPRIOT_SYLLABARY", 1736 "CYPRIOT SYLLABARY", 1737 "CYPRIOTSYLLABARY"); 1738 1739 /** 1740 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1741 * @since 1.5 1742 */ 1743 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1744 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1745 "BYZANTINE MUSICAL SYMBOLS", 1746 "BYZANTINEMUSICALSYMBOLS"); 1747 1748 /** 1749 * Constant for the "Musical Symbols" Unicode character block. 1750 * @since 1.5 1751 */ 1752 public static final UnicodeBlock MUSICAL_SYMBOLS = 1753 new UnicodeBlock("MUSICAL_SYMBOLS", 1754 "MUSICAL SYMBOLS", 1755 "MUSICALSYMBOLS"); 1756 1757 /** 1758 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1759 * @since 1.5 1760 */ 1761 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1762 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1763 "TAI XUAN JING SYMBOLS", 1764 "TAIXUANJINGSYMBOLS"); 1765 1766 /** 1767 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1768 * character block. 1769 * @since 1.5 1770 */ 1771 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1772 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1773 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1774 "MATHEMATICALALPHANUMERICSYMBOLS"); 1775 1776 /** 1777 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1778 * character block. 1779 * @since 1.5 1780 */ 1781 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1782 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1783 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1784 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1785 1786 /** 1787 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1788 * @since 1.5 1789 */ 1790 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1791 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1792 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1793 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1794 1795 /** 1796 * Constant for the "Tags" Unicode character block. 1797 * @since 1.5 1798 */ 1799 public static final UnicodeBlock TAGS = 1800 new UnicodeBlock("TAGS"); 1801 1802 /** 1803 * Constant for the "Variation Selectors Supplement" Unicode character 1804 * block. 1805 * @since 1.5 1806 */ 1807 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1808 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1809 "VARIATION SELECTORS SUPPLEMENT", 1810 "VARIATIONSELECTORSSUPPLEMENT"); 1811 1812 /** 1813 * Constant for the "Supplementary Private Use Area-A" Unicode character 1814 * block. 1815 * @since 1.5 1816 */ 1817 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1818 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1819 "SUPPLEMENTARY PRIVATE USE AREA-A", 1820 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1821 1822 /** 1823 * Constant for the "Supplementary Private Use Area-B" Unicode character 1824 * block. 1825 * @since 1.5 1826 */ 1827 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1828 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1829 "SUPPLEMENTARY PRIVATE USE AREA-B", 1830 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1831 1832 /** 1833 * Constant for the "High Surrogates" Unicode character block. 1834 * This block represents codepoint values in the high surrogate 1835 * range: U+D800 through U+DB7F 1836 * 1837 * @since 1.5 1838 */ 1839 public static final UnicodeBlock HIGH_SURROGATES = 1840 new UnicodeBlock("HIGH_SURROGATES", 1841 "HIGH SURROGATES", 1842 "HIGHSURROGATES"); 1843 1844 /** 1845 * Constant for the "High Private Use Surrogates" Unicode character 1846 * block. 1847 * This block represents codepoint values in the private use high 1848 * surrogate range: U+DB80 through U+DBFF 1849 * 1850 * @since 1.5 1851 */ 1852 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1853 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1854 "HIGH PRIVATE USE SURROGATES", 1855 "HIGHPRIVATEUSESURROGATES"); 1856 1857 /** 1858 * Constant for the "Low Surrogates" Unicode character block. 1859 * This block represents codepoint values in the low surrogate 1860 * range: U+DC00 through U+DFFF 1861 * 1862 * @since 1.5 1863 */ 1864 public static final UnicodeBlock LOW_SURROGATES = 1865 new UnicodeBlock("LOW_SURROGATES", 1866 "LOW SURROGATES", 1867 "LOWSURROGATES"); 1868 1869 /** 1870 * Constant for the "Arabic Supplement" Unicode character block. 1871 * @since 1.7 1872 */ 1873 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1874 new UnicodeBlock("ARABIC_SUPPLEMENT", 1875 "ARABIC SUPPLEMENT", 1876 "ARABICSUPPLEMENT"); 1877 1878 /** 1879 * Constant for the "NKo" Unicode character block. 1880 * @since 1.7 1881 */ 1882 public static final UnicodeBlock NKO = 1883 new UnicodeBlock("NKO"); 1884 1885 /** 1886 * Constant for the "Samaritan" Unicode character block. 1887 * @since 1.7 1888 */ 1889 public static final UnicodeBlock SAMARITAN = 1890 new UnicodeBlock("SAMARITAN"); 1891 1892 /** 1893 * Constant for the "Mandaic" Unicode character block. 1894 * @since 1.7 1895 */ 1896 public static final UnicodeBlock MANDAIC = 1897 new UnicodeBlock("MANDAIC"); 1898 1899 /** 1900 * Constant for the "Ethiopic Supplement" Unicode character block. 1901 * @since 1.7 1902 */ 1903 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1904 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1905 "ETHIOPIC SUPPLEMENT", 1906 "ETHIOPICSUPPLEMENT"); 1907 1908 /** 1909 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1910 * Unicode character block. 1911 * @since 1.7 1912 */ 1913 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1914 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1915 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1916 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1917 1918 /** 1919 * Constant for the "New Tai Lue" Unicode character block. 1920 * @since 1.7 1921 */ 1922 public static final UnicodeBlock NEW_TAI_LUE = 1923 new UnicodeBlock("NEW_TAI_LUE", 1924 "NEW TAI LUE", 1925 "NEWTAILUE"); 1926 1927 /** 1928 * Constant for the "Buginese" Unicode character block. 1929 * @since 1.7 1930 */ 1931 public static final UnicodeBlock BUGINESE = 1932 new UnicodeBlock("BUGINESE"); 1933 1934 /** 1935 * Constant for the "Tai Tham" Unicode character block. 1936 * @since 1.7 1937 */ 1938 public static final UnicodeBlock TAI_THAM = 1939 new UnicodeBlock("TAI_THAM", 1940 "TAI THAM", 1941 "TAITHAM"); 1942 1943 /** 1944 * Constant for the "Balinese" Unicode character block. 1945 * @since 1.7 1946 */ 1947 public static final UnicodeBlock BALINESE = 1948 new UnicodeBlock("BALINESE"); 1949 1950 /** 1951 * Constant for the "Sundanese" Unicode character block. 1952 * @since 1.7 1953 */ 1954 public static final UnicodeBlock SUNDANESE = 1955 new UnicodeBlock("SUNDANESE"); 1956 1957 /** 1958 * Constant for the "Batak" Unicode character block. 1959 * @since 1.7 1960 */ 1961 public static final UnicodeBlock BATAK = 1962 new UnicodeBlock("BATAK"); 1963 1964 /** 1965 * Constant for the "Lepcha" Unicode character block. 1966 * @since 1.7 1967 */ 1968 public static final UnicodeBlock LEPCHA = 1969 new UnicodeBlock("LEPCHA"); 1970 1971 /** 1972 * Constant for the "Ol Chiki" Unicode character block. 1973 * @since 1.7 1974 */ 1975 public static final UnicodeBlock OL_CHIKI = 1976 new UnicodeBlock("OL_CHIKI", 1977 "OL CHIKI", 1978 "OLCHIKI"); 1979 1980 /** 1981 * Constant for the "Vedic Extensions" Unicode character block. 1982 * @since 1.7 1983 */ 1984 public static final UnicodeBlock VEDIC_EXTENSIONS = 1985 new UnicodeBlock("VEDIC_EXTENSIONS", 1986 "VEDIC EXTENSIONS", 1987 "VEDICEXTENSIONS"); 1988 1989 /** 1990 * Constant for the "Phonetic Extensions Supplement" Unicode character 1991 * block. 1992 * @since 1.7 1993 */ 1994 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1995 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1996 "PHONETIC EXTENSIONS SUPPLEMENT", 1997 "PHONETICEXTENSIONSSUPPLEMENT"); 1998 1999 /** 2000 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2001 * character block. 2002 * @since 1.7 2003 */ 2004 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2005 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2006 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2007 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2008 2009 /** 2010 * Constant for the "Glagolitic" Unicode character block. 2011 * @since 1.7 2012 */ 2013 public static final UnicodeBlock GLAGOLITIC = 2014 new UnicodeBlock("GLAGOLITIC"); 2015 2016 /** 2017 * Constant for the "Latin Extended-C" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock LATIN_EXTENDED_C = 2021 new UnicodeBlock("LATIN_EXTENDED_C", 2022 "LATIN EXTENDED-C", 2023 "LATINEXTENDED-C"); 2024 2025 /** 2026 * Constant for the "Coptic" Unicode character block. 2027 * @since 1.7 2028 */ 2029 public static final UnicodeBlock COPTIC = 2030 new UnicodeBlock("COPTIC"); 2031 2032 /** 2033 * Constant for the "Georgian Supplement" Unicode character block. 2034 * @since 1.7 2035 */ 2036 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2037 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2038 "GEORGIAN SUPPLEMENT", 2039 "GEORGIANSUPPLEMENT"); 2040 2041 /** 2042 * Constant for the "Tifinagh" Unicode character block. 2043 * @since 1.7 2044 */ 2045 public static final UnicodeBlock TIFINAGH = 2046 new UnicodeBlock("TIFINAGH"); 2047 2048 /** 2049 * Constant for the "Ethiopic Extended" Unicode character block. 2050 * @since 1.7 2051 */ 2052 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2053 new UnicodeBlock("ETHIOPIC_EXTENDED", 2054 "ETHIOPIC EXTENDED", 2055 "ETHIOPICEXTENDED"); 2056 2057 /** 2058 * Constant for the "Cyrillic Extended-A" Unicode character block. 2059 * @since 1.7 2060 */ 2061 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2062 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2063 "CYRILLIC EXTENDED-A", 2064 "CYRILLICEXTENDED-A"); 2065 2066 /** 2067 * Constant for the "Supplemental Punctuation" Unicode character block. 2068 * @since 1.7 2069 */ 2070 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2071 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2072 "SUPPLEMENTAL PUNCTUATION", 2073 "SUPPLEMENTALPUNCTUATION"); 2074 2075 /** 2076 * Constant for the "CJK Strokes" Unicode character block. 2077 * @since 1.7 2078 */ 2079 public static final UnicodeBlock CJK_STROKES = 2080 new UnicodeBlock("CJK_STROKES", 2081 "CJK STROKES", 2082 "CJKSTROKES"); 2083 2084 /** 2085 * Constant for the "Lisu" Unicode character block. 2086 * @since 1.7 2087 */ 2088 public static final UnicodeBlock LISU = 2089 new UnicodeBlock("LISU"); 2090 2091 /** 2092 * Constant for the "Vai" Unicode character block. 2093 * @since 1.7 2094 */ 2095 public static final UnicodeBlock VAI = 2096 new UnicodeBlock("VAI"); 2097 2098 /** 2099 * Constant for the "Cyrillic Extended-B" Unicode character block. 2100 * @since 1.7 2101 */ 2102 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2103 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2104 "CYRILLIC EXTENDED-B", 2105 "CYRILLICEXTENDED-B"); 2106 2107 /** 2108 * Constant for the "Bamum" Unicode character block. 2109 * @since 1.7 2110 */ 2111 public static final UnicodeBlock BAMUM = 2112 new UnicodeBlock("BAMUM"); 2113 2114 /** 2115 * Constant for the "Modifier Tone Letters" Unicode character block. 2116 * @since 1.7 2117 */ 2118 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2119 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2120 "MODIFIER TONE LETTERS", 2121 "MODIFIERTONELETTERS"); 2122 2123 /** 2124 * Constant for the "Latin Extended-D" Unicode character block. 2125 * @since 1.7 2126 */ 2127 public static final UnicodeBlock LATIN_EXTENDED_D = 2128 new UnicodeBlock("LATIN_EXTENDED_D", 2129 "LATIN EXTENDED-D", 2130 "LATINEXTENDED-D"); 2131 2132 /** 2133 * Constant for the "Syloti Nagri" Unicode character block. 2134 * @since 1.7 2135 */ 2136 public static final UnicodeBlock SYLOTI_NAGRI = 2137 new UnicodeBlock("SYLOTI_NAGRI", 2138 "SYLOTI NAGRI", 2139 "SYLOTINAGRI"); 2140 2141 /** 2142 * Constant for the "Common Indic Number Forms" Unicode character block. 2143 * @since 1.7 2144 */ 2145 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2146 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2147 "COMMON INDIC NUMBER FORMS", 2148 "COMMONINDICNUMBERFORMS"); 2149 2150 /** 2151 * Constant for the "Phags-pa" Unicode character block. 2152 * @since 1.7 2153 */ 2154 public static final UnicodeBlock PHAGS_PA = 2155 new UnicodeBlock("PHAGS_PA", 2156 "PHAGS-PA"); 2157 2158 /** 2159 * Constant for the "Saurashtra" Unicode character block. 2160 * @since 1.7 2161 */ 2162 public static final UnicodeBlock SAURASHTRA = 2163 new UnicodeBlock("SAURASHTRA"); 2164 2165 /** 2166 * Constant for the "Devanagari Extended" Unicode character block. 2167 * @since 1.7 2168 */ 2169 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2170 new UnicodeBlock("DEVANAGARI_EXTENDED", 2171 "DEVANAGARI EXTENDED", 2172 "DEVANAGARIEXTENDED"); 2173 2174 /** 2175 * Constant for the "Kayah Li" Unicode character block. 2176 * @since 1.7 2177 */ 2178 public static final UnicodeBlock KAYAH_LI = 2179 new UnicodeBlock("KAYAH_LI", 2180 "KAYAH LI", 2181 "KAYAHLI"); 2182 2183 /** 2184 * Constant for the "Rejang" Unicode character block. 2185 * @since 1.7 2186 */ 2187 public static final UnicodeBlock REJANG = 2188 new UnicodeBlock("REJANG"); 2189 2190 /** 2191 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2192 * @since 1.7 2193 */ 2194 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2195 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2196 "HANGUL JAMO EXTENDED-A", 2197 "HANGULJAMOEXTENDED-A"); 2198 2199 /** 2200 * Constant for the "Javanese" Unicode character block. 2201 * @since 1.7 2202 */ 2203 public static final UnicodeBlock JAVANESE = 2204 new UnicodeBlock("JAVANESE"); 2205 2206 /** 2207 * Constant for the "Cham" Unicode character block. 2208 * @since 1.7 2209 */ 2210 public static final UnicodeBlock CHAM = 2211 new UnicodeBlock("CHAM"); 2212 2213 /** 2214 * Constant for the "Myanmar Extended-A" Unicode character block. 2215 * @since 1.7 2216 */ 2217 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2218 new UnicodeBlock("MYANMAR_EXTENDED_A", 2219 "MYANMAR EXTENDED-A", 2220 "MYANMAREXTENDED-A"); 2221 2222 /** 2223 * Constant for the "Tai Viet" Unicode character block. 2224 * @since 1.7 2225 */ 2226 public static final UnicodeBlock TAI_VIET = 2227 new UnicodeBlock("TAI_VIET", 2228 "TAI VIET", 2229 "TAIVIET"); 2230 2231 /** 2232 * Constant for the "Ethiopic Extended-A" Unicode character block. 2233 * @since 1.7 2234 */ 2235 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2236 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2237 "ETHIOPIC EXTENDED-A", 2238 "ETHIOPICEXTENDED-A"); 2239 2240 /** 2241 * Constant for the "Meetei Mayek" Unicode character block. 2242 * @since 1.7 2243 */ 2244 public static final UnicodeBlock MEETEI_MAYEK = 2245 new UnicodeBlock("MEETEI_MAYEK", 2246 "MEETEI MAYEK", 2247 "MEETEIMAYEK"); 2248 2249 /** 2250 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2251 * @since 1.7 2252 */ 2253 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2254 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2255 "HANGUL JAMO EXTENDED-B", 2256 "HANGULJAMOEXTENDED-B"); 2257 2258 /** 2259 * Constant for the "Vertical Forms" Unicode character block. 2260 * @since 1.7 2261 */ 2262 public static final UnicodeBlock VERTICAL_FORMS = 2263 new UnicodeBlock("VERTICAL_FORMS", 2264 "VERTICAL FORMS", 2265 "VERTICALFORMS"); 2266 2267 /** 2268 * Constant for the "Ancient Greek Numbers" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2272 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2273 "ANCIENT GREEK NUMBERS", 2274 "ANCIENTGREEKNUMBERS"); 2275 2276 /** 2277 * Constant for the "Ancient Symbols" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock ANCIENT_SYMBOLS = 2281 new UnicodeBlock("ANCIENT_SYMBOLS", 2282 "ANCIENT SYMBOLS", 2283 "ANCIENTSYMBOLS"); 2284 2285 /** 2286 * Constant for the "Phaistos Disc" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock PHAISTOS_DISC = 2290 new UnicodeBlock("PHAISTOS_DISC", 2291 "PHAISTOS DISC", 2292 "PHAISTOSDISC"); 2293 2294 /** 2295 * Constant for the "Lycian" Unicode character block. 2296 * @since 1.7 2297 */ 2298 public static final UnicodeBlock LYCIAN = 2299 new UnicodeBlock("LYCIAN"); 2300 2301 /** 2302 * Constant for the "Carian" Unicode character block. 2303 * @since 1.7 2304 */ 2305 public static final UnicodeBlock CARIAN = 2306 new UnicodeBlock("CARIAN"); 2307 2308 /** 2309 * Constant for the "Old Persian" Unicode character block. 2310 * @since 1.7 2311 */ 2312 public static final UnicodeBlock OLD_PERSIAN = 2313 new UnicodeBlock("OLD_PERSIAN", 2314 "OLD PERSIAN", 2315 "OLDPERSIAN"); 2316 2317 /** 2318 * Constant for the "Imperial Aramaic" Unicode character block. 2319 * @since 1.7 2320 */ 2321 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2322 new UnicodeBlock("IMPERIAL_ARAMAIC", 2323 "IMPERIAL ARAMAIC", 2324 "IMPERIALARAMAIC"); 2325 2326 /** 2327 * Constant for the "Phoenician" Unicode character block. 2328 * @since 1.7 2329 */ 2330 public static final UnicodeBlock PHOENICIAN = 2331 new UnicodeBlock("PHOENICIAN"); 2332 2333 /** 2334 * Constant for the "Lydian" Unicode character block. 2335 * @since 1.7 2336 */ 2337 public static final UnicodeBlock LYDIAN = 2338 new UnicodeBlock("LYDIAN"); 2339 2340 /** 2341 * Constant for the "Kharoshthi" Unicode character block. 2342 * @since 1.7 2343 */ 2344 public static final UnicodeBlock KHAROSHTHI = 2345 new UnicodeBlock("KHAROSHTHI"); 2346 2347 /** 2348 * Constant for the "Old South Arabian" Unicode character block. 2349 * @since 1.7 2350 */ 2351 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2352 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2353 "OLD SOUTH ARABIAN", 2354 "OLDSOUTHARABIAN"); 2355 2356 /** 2357 * Constant for the "Avestan" Unicode character block. 2358 * @since 1.7 2359 */ 2360 public static final UnicodeBlock AVESTAN = 2361 new UnicodeBlock("AVESTAN"); 2362 2363 /** 2364 * Constant for the "Inscriptional Parthian" Unicode character block. 2365 * @since 1.7 2366 */ 2367 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2368 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2369 "INSCRIPTIONAL PARTHIAN", 2370 "INSCRIPTIONALPARTHIAN"); 2371 2372 /** 2373 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2374 * @since 1.7 2375 */ 2376 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2377 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2378 "INSCRIPTIONAL PAHLAVI", 2379 "INSCRIPTIONALPAHLAVI"); 2380 2381 /** 2382 * Constant for the "Old Turkic" Unicode character block. 2383 * @since 1.7 2384 */ 2385 public static final UnicodeBlock OLD_TURKIC = 2386 new UnicodeBlock("OLD_TURKIC", 2387 "OLD TURKIC", 2388 "OLDTURKIC"); 2389 2390 /** 2391 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2392 * @since 1.7 2393 */ 2394 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2395 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2396 "RUMI NUMERAL SYMBOLS", 2397 "RUMINUMERALSYMBOLS"); 2398 2399 /** 2400 * Constant for the "Brahmi" Unicode character block. 2401 * @since 1.7 2402 */ 2403 public static final UnicodeBlock BRAHMI = 2404 new UnicodeBlock("BRAHMI"); 2405 2406 /** 2407 * Constant for the "Kaithi" Unicode character block. 2408 * @since 1.7 2409 */ 2410 public static final UnicodeBlock KAITHI = 2411 new UnicodeBlock("KAITHI"); 2412 2413 /** 2414 * Constant for the "Cuneiform" Unicode character block. 2415 * @since 1.7 2416 */ 2417 public static final UnicodeBlock CUNEIFORM = 2418 new UnicodeBlock("CUNEIFORM"); 2419 2420 /** 2421 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2422 * character block. 2423 * @since 1.7 2424 */ 2425 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2426 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2427 "CUNEIFORM NUMBERS AND PUNCTUATION", 2428 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2429 2430 /** 2431 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2432 * @since 1.7 2433 */ 2434 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2435 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2436 "EGYPTIAN HIEROGLYPHS", 2437 "EGYPTIANHIEROGLYPHS"); 2438 2439 /** 2440 * Constant for the "Bamum Supplement" Unicode character block. 2441 * @since 1.7 2442 */ 2443 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2444 new UnicodeBlock("BAMUM_SUPPLEMENT", 2445 "BAMUM SUPPLEMENT", 2446 "BAMUMSUPPLEMENT"); 2447 2448 /** 2449 * Constant for the "Kana Supplement" Unicode character block. 2450 * @since 1.7 2451 */ 2452 public static final UnicodeBlock KANA_SUPPLEMENT = 2453 new UnicodeBlock("KANA_SUPPLEMENT", 2454 "KANA SUPPLEMENT", 2455 "KANASUPPLEMENT"); 2456 2457 /** 2458 * Constant for the "Ancient Greek Musical Notation" Unicode character 2459 * block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2463 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2464 "ANCIENT GREEK MUSICAL NOTATION", 2465 "ANCIENTGREEKMUSICALNOTATION"); 2466 2467 /** 2468 * Constant for the "Counting Rod Numerals" Unicode character block. 2469 * @since 1.7 2470 */ 2471 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2472 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2473 "COUNTING ROD NUMERALS", 2474 "COUNTINGRODNUMERALS"); 2475 2476 /** 2477 * Constant for the "Mahjong Tiles" Unicode character block. 2478 * @since 1.7 2479 */ 2480 public static final UnicodeBlock MAHJONG_TILES = 2481 new UnicodeBlock("MAHJONG_TILES", 2482 "MAHJONG TILES", 2483 "MAHJONGTILES"); 2484 2485 /** 2486 * Constant for the "Domino Tiles" Unicode character block. 2487 * @since 1.7 2488 */ 2489 public static final UnicodeBlock DOMINO_TILES = 2490 new UnicodeBlock("DOMINO_TILES", 2491 "DOMINO TILES", 2492 "DOMINOTILES"); 2493 2494 /** 2495 * Constant for the "Playing Cards" Unicode character block. 2496 * @since 1.7 2497 */ 2498 public static final UnicodeBlock PLAYING_CARDS = 2499 new UnicodeBlock("PLAYING_CARDS", 2500 "PLAYING CARDS", 2501 "PLAYINGCARDS"); 2502 2503 /** 2504 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2505 * block. 2506 * @since 1.7 2507 */ 2508 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2509 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2510 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2511 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2512 2513 /** 2514 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2515 * block. 2516 * @since 1.7 2517 */ 2518 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2519 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2520 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2521 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2522 2523 /** 2524 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2525 * character block. 2526 * @since 1.7 2527 */ 2528 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2529 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2530 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2531 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2532 2533 /** 2534 * Constant for the "Emoticons" Unicode character block. 2535 * @since 1.7 2536 */ 2537 public static final UnicodeBlock EMOTICONS = 2538 new UnicodeBlock("EMOTICONS"); 2539 2540 /** 2541 * Constant for the "Transport And Map Symbols" Unicode character block. 2542 * @since 1.7 2543 */ 2544 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2545 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2546 "TRANSPORT AND MAP SYMBOLS", 2547 "TRANSPORTANDMAPSYMBOLS"); 2548 2549 /** 2550 * Constant for the "Alchemical Symbols" Unicode character block. 2551 * @since 1.7 2552 */ 2553 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2554 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2555 "ALCHEMICAL SYMBOLS", 2556 "ALCHEMICALSYMBOLS"); 2557 2558 /** 2559 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2560 * character block. 2561 * @since 1.7 2562 */ 2563 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2564 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2565 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2566 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2567 2568 /** 2569 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2570 * character block. 2571 * @since 1.7 2572 */ 2573 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2574 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2575 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2576 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2577 2578 /** 2579 * Constant for the "Arabic Extended-A" Unicode character block. 2580 * @since 1.8 2581 */ 2582 public static final UnicodeBlock ARABIC_EXTENDED_A = 2583 new UnicodeBlock("ARABIC_EXTENDED_A", 2584 "ARABIC EXTENDED-A", 2585 "ARABICEXTENDED-A"); 2586 2587 /** 2588 * Constant for the "Sundanese Supplement" Unicode character block. 2589 * @since 1.8 2590 */ 2591 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2592 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2593 "SUNDANESE SUPPLEMENT", 2594 "SUNDANESESUPPLEMENT"); 2595 2596 /** 2597 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2598 * @since 1.8 2599 */ 2600 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2601 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2602 "MEETEI MAYEK EXTENSIONS", 2603 "MEETEIMAYEKEXTENSIONS"); 2604 2605 /** 2606 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2607 * @since 1.8 2608 */ 2609 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2610 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2611 "MEROITIC HIEROGLYPHS", 2612 "MEROITICHIEROGLYPHS"); 2613 2614 /** 2615 * Constant for the "Meroitic Cursive" Unicode character block. 2616 * @since 1.8 2617 */ 2618 public static final UnicodeBlock MEROITIC_CURSIVE = 2619 new UnicodeBlock("MEROITIC_CURSIVE", 2620 "MEROITIC CURSIVE", 2621 "MEROITICCURSIVE"); 2622 2623 /** 2624 * Constant for the "Sora Sompeng" Unicode character block. 2625 * @since 1.8 2626 */ 2627 public static final UnicodeBlock SORA_SOMPENG = 2628 new UnicodeBlock("SORA_SOMPENG", 2629 "SORA SOMPENG", 2630 "SORASOMPENG"); 2631 2632 /** 2633 * Constant for the "Chakma" Unicode character block. 2634 * @since 1.8 2635 */ 2636 public static final UnicodeBlock CHAKMA = 2637 new UnicodeBlock("CHAKMA"); 2638 2639 /** 2640 * Constant for the "Sharada" Unicode character block. 2641 * @since 1.8 2642 */ 2643 public static final UnicodeBlock SHARADA = 2644 new UnicodeBlock("SHARADA"); 2645 2646 /** 2647 * Constant for the "Takri" Unicode character block. 2648 * @since 1.8 2649 */ 2650 public static final UnicodeBlock TAKRI = 2651 new UnicodeBlock("TAKRI"); 2652 2653 /** 2654 * Constant for the "Miao" Unicode character block. 2655 * @since 1.8 2656 */ 2657 public static final UnicodeBlock MIAO = 2658 new UnicodeBlock("MIAO"); 2659 2660 /** 2661 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2662 * character block. 2663 * @since 1.8 2664 */ 2665 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2666 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2667 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2668 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2669 2670 /** 2671 * Constant for the "Combining Diacritical Marks Extended" Unicode 2672 * character block. 2673 * @since 9 2674 */ 2675 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2676 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2677 "COMBINING DIACRITICAL MARKS EXTENDED", 2678 "COMBININGDIACRITICALMARKSEXTENDED"); 2679 2680 /** 2681 * Constant for the "Myanmar Extended-B" Unicode character block. 2682 * @since 9 2683 */ 2684 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2685 new UnicodeBlock("MYANMAR_EXTENDED_B", 2686 "MYANMAR EXTENDED-B", 2687 "MYANMAREXTENDED-B"); 2688 2689 /** 2690 * Constant for the "Latin Extended-E" Unicode character block. 2691 * @since 9 2692 */ 2693 public static final UnicodeBlock LATIN_EXTENDED_E = 2694 new UnicodeBlock("LATIN_EXTENDED_E", 2695 "LATIN EXTENDED-E", 2696 "LATINEXTENDED-E"); 2697 2698 /** 2699 * Constant for the "Coptic Epact Numbers" Unicode character block. 2700 * @since 9 2701 */ 2702 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2703 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2704 "COPTIC EPACT NUMBERS", 2705 "COPTICEPACTNUMBERS"); 2706 2707 /** 2708 * Constant for the "Old Permic" Unicode character block. 2709 * @since 9 2710 */ 2711 public static final UnicodeBlock OLD_PERMIC = 2712 new UnicodeBlock("OLD_PERMIC", 2713 "OLD PERMIC", 2714 "OLDPERMIC"); 2715 2716 /** 2717 * Constant for the "Elbasan" Unicode character block. 2718 * @since 9 2719 */ 2720 public static final UnicodeBlock ELBASAN = 2721 new UnicodeBlock("ELBASAN"); 2722 2723 /** 2724 * Constant for the "Caucasian Albanian" Unicode character block. 2725 * @since 9 2726 */ 2727 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2728 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2729 "CAUCASIAN ALBANIAN", 2730 "CAUCASIANALBANIAN"); 2731 2732 /** 2733 * Constant for the "Linear A" Unicode character block. 2734 * @since 9 2735 */ 2736 public static final UnicodeBlock LINEAR_A = 2737 new UnicodeBlock("LINEAR_A", 2738 "LINEAR A", 2739 "LINEARA"); 2740 2741 /** 2742 * Constant for the "Palmyrene" Unicode character block. 2743 * @since 9 2744 */ 2745 public static final UnicodeBlock PALMYRENE = 2746 new UnicodeBlock("PALMYRENE"); 2747 2748 /** 2749 * Constant for the "Nabataean" Unicode character block. 2750 * @since 9 2751 */ 2752 public static final UnicodeBlock NABATAEAN = 2753 new UnicodeBlock("NABATAEAN"); 2754 2755 /** 2756 * Constant for the "Old North Arabian" Unicode character block. 2757 * @since 9 2758 */ 2759 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2760 new UnicodeBlock("OLD_NORTH_ARABIAN", 2761 "OLD NORTH ARABIAN", 2762 "OLDNORTHARABIAN"); 2763 2764 /** 2765 * Constant for the "Manichaean" Unicode character block. 2766 * @since 9 2767 */ 2768 public static final UnicodeBlock MANICHAEAN = 2769 new UnicodeBlock("MANICHAEAN"); 2770 2771 /** 2772 * Constant for the "Psalter Pahlavi" Unicode character block. 2773 * @since 9 2774 */ 2775 public static final UnicodeBlock PSALTER_PAHLAVI = 2776 new UnicodeBlock("PSALTER_PAHLAVI", 2777 "PSALTER PAHLAVI", 2778 "PSALTERPAHLAVI"); 2779 2780 /** 2781 * Constant for the "Mahajani" Unicode character block. 2782 * @since 9 2783 */ 2784 public static final UnicodeBlock MAHAJANI = 2785 new UnicodeBlock("MAHAJANI"); 2786 2787 /** 2788 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2789 * @since 9 2790 */ 2791 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2792 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2793 "SINHALA ARCHAIC NUMBERS", 2794 "SINHALAARCHAICNUMBERS"); 2795 2796 /** 2797 * Constant for the "Khojki" Unicode character block. 2798 * @since 9 2799 */ 2800 public static final UnicodeBlock KHOJKI = 2801 new UnicodeBlock("KHOJKI"); 2802 2803 /** 2804 * Constant for the "Khudawadi" Unicode character block. 2805 * @since 9 2806 */ 2807 public static final UnicodeBlock KHUDAWADI = 2808 new UnicodeBlock("KHUDAWADI"); 2809 2810 /** 2811 * Constant for the "Grantha" Unicode character block. 2812 * @since 9 2813 */ 2814 public static final UnicodeBlock GRANTHA = 2815 new UnicodeBlock("GRANTHA"); 2816 2817 /** 2818 * Constant for the "Tirhuta" Unicode character block. 2819 * @since 9 2820 */ 2821 public static final UnicodeBlock TIRHUTA = 2822 new UnicodeBlock("TIRHUTA"); 2823 2824 /** 2825 * Constant for the "Siddham" Unicode character block. 2826 * @since 9 2827 */ 2828 public static final UnicodeBlock SIDDHAM = 2829 new UnicodeBlock("SIDDHAM"); 2830 2831 /** 2832 * Constant for the "Modi" Unicode character block. 2833 * @since 9 2834 */ 2835 public static final UnicodeBlock MODI = 2836 new UnicodeBlock("MODI"); 2837 2838 /** 2839 * Constant for the "Warang Citi" Unicode character block. 2840 * @since 9 2841 */ 2842 public static final UnicodeBlock WARANG_CITI = 2843 new UnicodeBlock("WARANG_CITI", 2844 "WARANG CITI", 2845 "WARANGCITI"); 2846 2847 /** 2848 * Constant for the "Pau Cin Hau" Unicode character block. 2849 * @since 9 2850 */ 2851 public static final UnicodeBlock PAU_CIN_HAU = 2852 new UnicodeBlock("PAU_CIN_HAU", 2853 "PAU CIN HAU", 2854 "PAUCINHAU"); 2855 2856 /** 2857 * Constant for the "Mro" Unicode character block. 2858 * @since 9 2859 */ 2860 public static final UnicodeBlock MRO = 2861 new UnicodeBlock("MRO"); 2862 2863 /** 2864 * Constant for the "Bassa Vah" Unicode character block. 2865 * @since 9 2866 */ 2867 public static final UnicodeBlock BASSA_VAH = 2868 new UnicodeBlock("BASSA_VAH", 2869 "BASSA VAH", 2870 "BASSAVAH"); 2871 2872 /** 2873 * Constant for the "Pahawh Hmong" Unicode character block. 2874 * @since 9 2875 */ 2876 public static final UnicodeBlock PAHAWH_HMONG = 2877 new UnicodeBlock("PAHAWH_HMONG", 2878 "PAHAWH HMONG", 2879 "PAHAWHHMONG"); 2880 2881 /** 2882 * Constant for the "Duployan" Unicode character block. 2883 * @since 9 2884 */ 2885 public static final UnicodeBlock DUPLOYAN = 2886 new UnicodeBlock("DUPLOYAN"); 2887 2888 /** 2889 * Constant for the "Shorthand Format Controls" Unicode character block. 2890 * @since 9 2891 */ 2892 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2893 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2894 "SHORTHAND FORMAT CONTROLS", 2895 "SHORTHANDFORMATCONTROLS"); 2896 2897 /** 2898 * Constant for the "Mende Kikakui" Unicode character block. 2899 * @since 9 2900 */ 2901 public static final UnicodeBlock MENDE_KIKAKUI = 2902 new UnicodeBlock("MENDE_KIKAKUI", 2903 "MENDE KIKAKUI", 2904 "MENDEKIKAKUI"); 2905 2906 /** 2907 * Constant for the "Ornamental Dingbats" Unicode character block. 2908 * @since 9 2909 */ 2910 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2911 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2912 "ORNAMENTAL DINGBATS", 2913 "ORNAMENTALDINGBATS"); 2914 2915 /** 2916 * Constant for the "Geometric Shapes Extended" Unicode character block. 2917 * @since 9 2918 */ 2919 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2920 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2921 "GEOMETRIC SHAPES EXTENDED", 2922 "GEOMETRICSHAPESEXTENDED"); 2923 2924 /** 2925 * Constant for the "Supplemental Arrows-C" Unicode character block. 2926 * @since 9 2927 */ 2928 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2929 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2930 "SUPPLEMENTAL ARROWS-C", 2931 "SUPPLEMENTALARROWS-C"); 2932 2933 /** 2934 * Constant for the "Cherokee Supplement" Unicode character block. 2935 * @since 9 2936 */ 2937 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2938 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2939 "CHEROKEE SUPPLEMENT", 2940 "CHEROKEESUPPLEMENT"); 2941 2942 /** 2943 * Constant for the "Hatran" Unicode character block. 2944 * @since 9 2945 */ 2946 public static final UnicodeBlock HATRAN = 2947 new UnicodeBlock("HATRAN"); 2948 2949 /** 2950 * Constant for the "Old Hungarian" Unicode character block. 2951 * @since 9 2952 */ 2953 public static final UnicodeBlock OLD_HUNGARIAN = 2954 new UnicodeBlock("OLD_HUNGARIAN", 2955 "OLD HUNGARIAN", 2956 "OLDHUNGARIAN"); 2957 2958 /** 2959 * Constant for the "Multani" Unicode character block. 2960 * @since 9 2961 */ 2962 public static final UnicodeBlock MULTANI = 2963 new UnicodeBlock("MULTANI"); 2964 2965 /** 2966 * Constant for the "Ahom" Unicode character block. 2967 * @since 9 2968 */ 2969 public static final UnicodeBlock AHOM = 2970 new UnicodeBlock("AHOM"); 2971 2972 /** 2973 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 2974 * @since 9 2975 */ 2976 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2977 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 2978 "EARLY DYNASTIC CUNEIFORM", 2979 "EARLYDYNASTICCUNEIFORM"); 2980 2981 /** 2982 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 2983 * @since 9 2984 */ 2985 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2986 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 2987 "ANATOLIAN HIEROGLYPHS", 2988 "ANATOLIANHIEROGLYPHS"); 2989 2990 /** 2991 * Constant for the "Sutton SignWriting" Unicode character block. 2992 * @since 9 2993 */ 2994 public static final UnicodeBlock SUTTON_SIGNWRITING = 2995 new UnicodeBlock("SUTTON_SIGNWRITING", 2996 "SUTTON SIGNWRITING", 2997 "SUTTONSIGNWRITING"); 2998 2999 /** 3000 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3001 * character block. 3002 * @since 9 3003 */ 3004 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3005 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3006 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3007 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3008 3009 /** 3010 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3011 * character block. 3012 * @since 9 3013 */ 3014 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3015 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3016 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3017 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3018 3019 /** 3020 * Constant for the "Syriac Supplement" Unicode 3021 * character block. 3022 * @since 11 3023 */ 3024 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3025 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3026 "SYRIAC SUPPLEMENT", 3027 "SYRIACSUPPLEMENT"); 3028 3029 /** 3030 * Constant for the "Cyrillic Extended-C" Unicode 3031 * character block. 3032 * @since 11 3033 */ 3034 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3035 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3036 "CYRILLIC EXTENDED-C", 3037 "CYRILLICEXTENDED-C"); 3038 3039 /** 3040 * Constant for the "Osage" Unicode 3041 * character block. 3042 * @since 11 3043 */ 3044 public static final UnicodeBlock OSAGE = 3045 new UnicodeBlock("OSAGE"); 3046 3047 /** 3048 * Constant for the "Newa" Unicode 3049 * character block. 3050 * @since 11 3051 */ 3052 public static final UnicodeBlock NEWA = 3053 new UnicodeBlock("NEWA"); 3054 3055 /** 3056 * Constant for the "Mongolian Supplement" Unicode 3057 * character block. 3058 * @since 11 3059 */ 3060 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3061 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3062 "MONGOLIAN SUPPLEMENT", 3063 "MONGOLIANSUPPLEMENT"); 3064 3065 /** 3066 * Constant for the "Marchen" Unicode 3067 * character block. 3068 * @since 11 3069 */ 3070 public static final UnicodeBlock MARCHEN = 3071 new UnicodeBlock("MARCHEN"); 3072 3073 /** 3074 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3075 * character block. 3076 * @since 11 3077 */ 3078 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3079 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3080 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3081 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3082 3083 /** 3084 * Constant for the "Tangut" Unicode 3085 * character block. 3086 * @since 11 3087 */ 3088 public static final UnicodeBlock TANGUT = 3089 new UnicodeBlock("TANGUT"); 3090 3091 /** 3092 * Constant for the "Tangut Components" Unicode 3093 * character block. 3094 * @since 11 3095 */ 3096 public static final UnicodeBlock TANGUT_COMPONENTS = 3097 new UnicodeBlock("TANGUT_COMPONENTS", 3098 "TANGUT COMPONENTS", 3099 "TANGUTCOMPONENTS"); 3100 3101 /** 3102 * Constant for the "Kana Extended-A" Unicode 3103 * character block. 3104 * @since 11 3105 */ 3106 public static final UnicodeBlock KANA_EXTENDED_A = 3107 new UnicodeBlock("KANA_EXTENDED_A", 3108 "KANA EXTENDED-A", 3109 "KANAEXTENDED-A"); 3110 /** 3111 * Constant for the "Glagolitic Supplement" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3116 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3117 "GLAGOLITIC SUPPLEMENT", 3118 "GLAGOLITICSUPPLEMENT"); 3119 /** 3120 * Constant for the "Adlam" Unicode 3121 * character block. 3122 * @since 11 3123 */ 3124 public static final UnicodeBlock ADLAM = 3125 new UnicodeBlock("ADLAM"); 3126 3127 /** 3128 * Constant for the "Masaram Gondi" Unicode 3129 * character block. 3130 * @since 11 3131 */ 3132 public static final UnicodeBlock MASARAM_GONDI = 3133 new UnicodeBlock("MASARAM_GONDI", 3134 "MASARAM GONDI", 3135 "MASARAMGONDI"); 3136 3137 /** 3138 * Constant for the "Zanabazar Square" Unicode 3139 * character block. 3140 * @since 11 3141 */ 3142 public static final UnicodeBlock ZANABAZAR_SQUARE = 3143 new UnicodeBlock("ZANABAZAR_SQUARE", 3144 "ZANABAZAR SQUARE", 3145 "ZANABAZARSQUARE"); 3146 3147 /** 3148 * Constant for the "Nushu" Unicode 3149 * character block. 3150 * @since 11 3151 */ 3152 public static final UnicodeBlock NUSHU = 3153 new UnicodeBlock("NUSHU"); 3154 3155 /** 3156 * Constant for the "Soyombo" Unicode 3157 * character block. 3158 * @since 11 3159 */ 3160 public static final UnicodeBlock SOYOMBO = 3161 new UnicodeBlock("SOYOMBO"); 3162 3163 /** 3164 * Constant for the "Bhaiksuki" Unicode 3165 * character block. 3166 * @since 11 3167 */ 3168 public static final UnicodeBlock BHAIKSUKI = 3169 new UnicodeBlock("BHAIKSUKI"); 3170 3171 /** 3172 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3173 * character block. 3174 * @since 11 3175 */ 3176 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3177 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3178 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3179 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3180 /** 3181 * Constant for the "Georgian Extended" Unicode 3182 * character block. 3183 * @since 12 3184 */ 3185 public static final UnicodeBlock GEORGIAN_EXTENDED = 3186 new UnicodeBlock("GEORGIAN_EXTENDED", 3187 "GEORGIAN EXTENDED", 3188 "GEORGIANEXTENDED"); 3189 3190 /** 3191 * Constant for the "Hanifi Rohingya" Unicode 3192 * character block. 3193 * @since 12 3194 */ 3195 public static final UnicodeBlock HANIFI_ROHINGYA = 3196 new UnicodeBlock("HANIFI_ROHINGYA", 3197 "HANIFI ROHINGYA", 3198 "HANIFIROHINGYA"); 3199 3200 /** 3201 * Constant for the "Old Sogdian" Unicode 3202 * character block. 3203 * @since 12 3204 */ 3205 public static final UnicodeBlock OLD_SOGDIAN = 3206 new UnicodeBlock("OLD_SOGDIAN", 3207 "OLD SOGDIAN", 3208 "OLDSOGDIAN"); 3209 3210 /** 3211 * Constant for the "Sogdian" Unicode 3212 * character block. 3213 * @since 12 3214 */ 3215 public static final UnicodeBlock SOGDIAN = 3216 new UnicodeBlock("SOGDIAN"); 3217 3218 /** 3219 * Constant for the "Dogra" Unicode 3220 * character block. 3221 * @since 12 3222 */ 3223 public static final UnicodeBlock DOGRA = 3224 new UnicodeBlock("DOGRA"); 3225 3226 /** 3227 * Constant for the "Gunjala Gondi" Unicode 3228 * character block. 3229 * @since 12 3230 */ 3231 public static final UnicodeBlock GUNJALA_GONDI = 3232 new UnicodeBlock("GUNJALA_GONDI", 3233 "GUNJALA GONDI", 3234 "GUNJALAGONDI"); 3235 3236 /** 3237 * Constant for the "Makasar" Unicode 3238 * character block. 3239 * @since 12 3240 */ 3241 public static final UnicodeBlock MAKASAR = 3242 new UnicodeBlock("MAKASAR"); 3243 3244 /** 3245 * Constant for the "Medefaidrin" Unicode 3246 * character block. 3247 * @since 12 3248 */ 3249 public static final UnicodeBlock MEDEFAIDRIN = 3250 new UnicodeBlock("MEDEFAIDRIN"); 3251 3252 /** 3253 * Constant for the "Mayan Numerals" Unicode 3254 * character block. 3255 * @since 12 3256 */ 3257 public static final UnicodeBlock MAYAN_NUMERALS = 3258 new UnicodeBlock("MAYAN_NUMERALS", 3259 "MAYAN NUMERALS", 3260 "MAYANNUMERALS"); 3261 3262 /** 3263 * Constant for the "Indic Siyaq Numbers" Unicode 3264 * character block. 3265 * @since 12 3266 */ 3267 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3268 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3269 "INDIC SIYAQ NUMBERS", 3270 "INDICSIYAQNUMBERS"); 3271 3272 /** 3273 * Constant for the "Chess Symbols" Unicode 3274 * character block. 3275 * @since 12 3276 */ 3277 public static final UnicodeBlock CHESS_SYMBOLS = 3278 new UnicodeBlock("CHESS_SYMBOLS", 3279 "CHESS SYMBOLS", 3280 "CHESSSYMBOLS"); 3281 3282 /** 3283 * Constant for the "Elymaic" Unicode 3284 * character block. 3285 * @since 13 3286 */ 3287 public static final UnicodeBlock ELYMAIC = 3288 new UnicodeBlock("ELYMAIC"); 3289 3290 /** 3291 * Constant for the "Nandinagari" Unicode 3292 * character block. 3293 * @since 13 3294 */ 3295 public static final UnicodeBlock NANDINAGARI = 3296 new UnicodeBlock("NANDINAGARI"); 3297 3298 /** 3299 * Constant for the "Tamil Supplement" Unicode 3300 * character block. 3301 * @since 13 3302 */ 3303 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3304 new UnicodeBlock("TAMIL_SUPPLEMENT", 3305 "TAMIL SUPPLEMENT", 3306 "TAMILSUPPLEMENT"); 3307 3308 /** 3309 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3310 * character block. 3311 * @since 13 3312 */ 3313 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3314 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3315 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3316 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3317 3318 /** 3319 * Constant for the "Small Kana Extension" Unicode 3320 * character block. 3321 * @since 13 3322 */ 3323 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3324 new UnicodeBlock("SMALL_KANA_EXTENSION", 3325 "SMALL KANA EXTENSION", 3326 "SMALLKANAEXTENSION"); 3327 3328 /** 3329 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3330 * character block. 3331 * @since 13 3332 */ 3333 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3334 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3335 "NYIAKENG PUACHUE HMONG", 3336 "NYIAKENGPUACHUEHMONG"); 3337 3338 /** 3339 * Constant for the "Wancho" Unicode 3340 * character block. 3341 * @since 13 3342 */ 3343 public static final UnicodeBlock WANCHO = 3344 new UnicodeBlock("WANCHO"); 3345 3346 /** 3347 * Constant for the "Ottoman Siyaq Numbers" Unicode 3348 * character block. 3349 * @since 13 3350 */ 3351 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3352 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3353 "OTTOMAN SIYAQ NUMBERS", 3354 "OTTOMANSIYAQNUMBERS"); 3355 3356 /** 3357 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3358 * character block. 3359 * @since 13 3360 */ 3361 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3362 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3363 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3364 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3365 3366 /** 3367 * Constant for the "Yezidi" Unicode 3368 * character block. 3369 * @since 15 3370 */ 3371 public static final UnicodeBlock YEZIDI = 3372 new UnicodeBlock("YEZIDI"); 3373 3374 /** 3375 * Constant for the "Chorasmian" Unicode 3376 * character block. 3377 * @since 15 3378 */ 3379 public static final UnicodeBlock CHORASMIAN = 3380 new UnicodeBlock("CHORASMIAN"); 3381 3382 /** 3383 * Constant for the "Dives Akuru" Unicode 3384 * character block. 3385 * @since 15 3386 */ 3387 public static final UnicodeBlock DIVES_AKURU = 3388 new UnicodeBlock("DIVES_AKURU", 3389 "DIVES AKURU", 3390 "DIVESAKURU"); 3391 3392 /** 3393 * Constant for the "Lisu Supplement" Unicode 3394 * character block. 3395 * @since 15 3396 */ 3397 public static final UnicodeBlock LISU_SUPPLEMENT = 3398 new UnicodeBlock("LISU_SUPPLEMENT", 3399 "LISU SUPPLEMENT", 3400 "LISUSUPPLEMENT"); 3401 3402 /** 3403 * Constant for the "Khitan Small Script" Unicode 3404 * character block. 3405 * @since 15 3406 */ 3407 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3408 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3409 "KHITAN SMALL SCRIPT", 3410 "KHITANSMALLSCRIPT"); 3411 3412 /** 3413 * Constant for the "Tangut Supplement" Unicode 3414 * character block. 3415 * @since 15 3416 */ 3417 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3418 new UnicodeBlock("TANGUT_SUPPLEMENT", 3419 "TANGUT SUPPLEMENT", 3420 "TANGUTSUPPLEMENT"); 3421 3422 /** 3423 * Constant for the "Symbols for Legacy Computing" Unicode 3424 * character block. 3425 * @since 15 3426 */ 3427 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3428 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3429 "SYMBOLS FOR LEGACY COMPUTING", 3430 "SYMBOLSFORLEGACYCOMPUTING"); 3431 3432 /** 3433 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3434 * character block. 3435 * @since 15 3436 */ 3437 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3438 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3439 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3440 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3441 3442 /** 3443 * Constant for the "Arabic Extended-B" Unicode 3444 * character block. 3445 * @since 19 3446 */ 3447 public static final UnicodeBlock ARABIC_EXTENDED_B = 3448 new UnicodeBlock("ARABIC_EXTENDED_B", 3449 "ARABIC EXTENDED-B", 3450 "ARABICEXTENDED-B"); 3451 3452 /** 3453 * Constant for the "Vithkuqi" Unicode 3454 * character block. 3455 * @since 19 3456 */ 3457 public static final UnicodeBlock VITHKUQI = 3458 new UnicodeBlock("VITHKUQI"); 3459 3460 /** 3461 * Constant for the "Latin Extended-F" Unicode 3462 * character block. 3463 * @since 19 3464 */ 3465 public static final UnicodeBlock LATIN_EXTENDED_F = 3466 new UnicodeBlock("LATIN_EXTENDED_F", 3467 "LATIN EXTENDED-F", 3468 "LATINEXTENDED-F"); 3469 3470 /** 3471 * Constant for the "Old Uyghur" Unicode 3472 * character block. 3473 * @since 19 3474 */ 3475 public static final UnicodeBlock OLD_UYGHUR = 3476 new UnicodeBlock("OLD_UYGHUR", 3477 "OLD UYGHUR", 3478 "OLDUYGHUR"); 3479 3480 /** 3481 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3482 * character block. 3483 * @since 19 3484 */ 3485 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3486 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3487 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3488 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3489 3490 /** 3491 * Constant for the "Cypro-Minoan" Unicode 3492 * character block. 3493 * @since 19 3494 */ 3495 public static final UnicodeBlock CYPRO_MINOAN = 3496 new UnicodeBlock("CYPRO_MINOAN", 3497 "CYPRO-MINOAN", 3498 "CYPRO-MINOAN"); 3499 3500 /** 3501 * Constant for the "Tangsa" Unicode 3502 * character block. 3503 * @since 19 3504 */ 3505 public static final UnicodeBlock TANGSA = 3506 new UnicodeBlock("TANGSA"); 3507 3508 /** 3509 * Constant for the "Kana Extended-B" Unicode 3510 * character block. 3511 * @since 19 3512 */ 3513 public static final UnicodeBlock KANA_EXTENDED_B = 3514 new UnicodeBlock("KANA_EXTENDED_B", 3515 "KANA EXTENDED-B", 3516 "KANAEXTENDED-B"); 3517 3518 /** 3519 * Constant for the "Znamenny Musical Notation" Unicode 3520 * character block. 3521 * @since 19 3522 */ 3523 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3524 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3525 "ZNAMENNY MUSICAL NOTATION", 3526 "ZNAMENNYMUSICALNOTATION"); 3527 3528 /** 3529 * Constant for the "Latin Extended-G" Unicode 3530 * character block. 3531 * @since 19 3532 */ 3533 public static final UnicodeBlock LATIN_EXTENDED_G = 3534 new UnicodeBlock("LATIN_EXTENDED_G", 3535 "LATIN EXTENDED-G", 3536 "LATINEXTENDED-G"); 3537 3538 /** 3539 * Constant for the "Toto" Unicode 3540 * character block. 3541 * @since 19 3542 */ 3543 public static final UnicodeBlock TOTO = 3544 new UnicodeBlock("TOTO"); 3545 3546 /** 3547 * Constant for the "Ethiopic Extended-B" Unicode 3548 * character block. 3549 * @since 19 3550 */ 3551 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3552 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3553 "ETHIOPIC EXTENDED-B", 3554 "ETHIOPICEXTENDED-B"); 3555 3556 /** 3557 * Constant for the "Arabic Extended-C" Unicode 3558 * character block. 3559 * @since 20 3560 */ 3561 public static final UnicodeBlock ARABIC_EXTENDED_C = 3562 new UnicodeBlock("ARABIC_EXTENDED_C", 3563 "ARABIC EXTENDED-C", 3564 "ARABICEXTENDED-C"); 3565 3566 /** 3567 * Constant for the "Devanagari Extended-A" Unicode 3568 * character block. 3569 * @since 20 3570 */ 3571 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3572 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3573 "DEVANAGARI EXTENDED-A", 3574 "DEVANAGARIEXTENDED-A"); 3575 3576 /** 3577 * Constant for the "Kawi" Unicode 3578 * character block. 3579 * @since 20 3580 */ 3581 public static final UnicodeBlock KAWI = 3582 new UnicodeBlock("KAWI"); 3583 3584 /** 3585 * Constant for the "Kaktovik Numerals" Unicode 3586 * character block. 3587 * @since 20 3588 */ 3589 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3590 new UnicodeBlock("KAKTOVIK_NUMERALS", 3591 "KAKTOVIK NUMERALS", 3592 "KAKTOVIKNUMERALS"); 3593 3594 /** 3595 * Constant for the "Cyrillic Extended-D" Unicode 3596 * character block. 3597 * @since 20 3598 */ 3599 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3600 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3601 "CYRILLIC EXTENDED-D", 3602 "CYRILLICEXTENDED-D"); 3603 3604 /** 3605 * Constant for the "Nag Mundari" Unicode 3606 * character block. 3607 * @since 20 3608 */ 3609 public static final UnicodeBlock NAG_MUNDARI = 3610 new UnicodeBlock("NAG_MUNDARI", 3611 "NAG MUNDARI", 3612 "NAGMUNDARI"); 3613 3614 /** 3615 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3616 * character block. 3617 * @since 20 3618 */ 3619 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3620 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3621 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3622 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3623 3624 /** 3625 * Constant for the "CJK Unified Ideographs Extension I" Unicode 3626 * character block. 3627 * @since 22 3628 */ 3629 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 3630 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 3631 "CJK UNIFIED IDEOGRAPHS EXTENSION I", 3632 "CJKUNIFIEDIDEOGRAPHSEXTENSIONI"); 3633 3634 /** 3635 * Constant for the "Todhri" Unicode 3636 * character block. 3637 * @since 24 3638 */ 3639 public static final UnicodeBlock TODHRI = 3640 new UnicodeBlock("TODHRI"); 3641 3642 /** 3643 * Constant for the "Garay" Unicode 3644 * character block. 3645 * @since 24 3646 */ 3647 public static final UnicodeBlock GARAY = 3648 new UnicodeBlock("GARAY"); 3649 3650 /** 3651 * Constant for the "Tulu-Tigalari" Unicode 3652 * character block. 3653 * @since 24 3654 */ 3655 public static final UnicodeBlock TULU_TIGALARI = 3656 new UnicodeBlock("TULU_TIGALARI", 3657 "TULU-TIGALARI"); 3658 3659 /** 3660 * Constant for the "Myanmar Extended-C" Unicode 3661 * character block. 3662 * @since 24 3663 */ 3664 public static final UnicodeBlock MYANMAR_EXTENDED_C = 3665 new UnicodeBlock("MYANMAR_EXTENDED_C", 3666 "MYANMAR EXTENDED-C", 3667 "MYANMAREXTENDED-C"); 3668 3669 /** 3670 * Constant for the "Sunuwar" Unicode 3671 * character block. 3672 * @since 24 3673 */ 3674 public static final UnicodeBlock SUNUWAR = 3675 new UnicodeBlock("SUNUWAR"); 3676 3677 /** 3678 * Constant for the "Egyptian Hieroglyphs Extended-A" Unicode 3679 * character block. 3680 * @since 24 3681 */ 3682 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS_EXTENDED_A = 3683 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS_EXTENDED_A", 3684 "EGYPTIAN HIEROGLYPHS EXTENDED-A", 3685 "EGYPTIANHIEROGLYPHSEXTENDED-A"); 3686 3687 /** 3688 * Constant for the "Gurung Khema" Unicode 3689 * character block. 3690 * @since 24 3691 */ 3692 public static final UnicodeBlock GURUNG_KHEMA = 3693 new UnicodeBlock("GURUNG_KHEMA", 3694 "GURUNG KHEMA", 3695 "GURUNGKHEMA"); 3696 3697 /** 3698 * Constant for the "Kirat Rai" Unicode 3699 * character block. 3700 * @since 24 3701 */ 3702 public static final UnicodeBlock KIRAT_RAI = 3703 new UnicodeBlock("KIRAT_RAI", 3704 "KIRAT RAI", 3705 "KIRATRAI"); 3706 3707 /** 3708 * Constant for the "Symbols for Legacy Computing Supplement" Unicode 3709 * character block. 3710 * @since 24 3711 */ 3712 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT = 3713 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT", 3714 "SYMBOLS FOR LEGACY COMPUTING SUPPLEMENT", 3715 "SYMBOLSFORLEGACYCOMPUTINGSUPPLEMENT"); 3716 3717 /** 3718 * Constant for the "Ol Onal" Unicode 3719 * character block. 3720 * @since 24 3721 */ 3722 public static final UnicodeBlock OL_ONAL = 3723 new UnicodeBlock("OL_ONAL", 3724 "OL ONAL", 3725 "OLONAL"); 3726 3727 private static final int[] blockStarts = { 3728 0x0000, // 0000..007F; Basic Latin 3729 0x0080, // 0080..00FF; Latin-1 Supplement 3730 0x0100, // 0100..017F; Latin Extended-A 3731 0x0180, // 0180..024F; Latin Extended-B 3732 0x0250, // 0250..02AF; IPA Extensions 3733 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3734 0x0300, // 0300..036F; Combining Diacritical Marks 3735 0x0370, // 0370..03FF; Greek and Coptic 3736 0x0400, // 0400..04FF; Cyrillic 3737 0x0500, // 0500..052F; Cyrillic Supplement 3738 0x0530, // 0530..058F; Armenian 3739 0x0590, // 0590..05FF; Hebrew 3740 0x0600, // 0600..06FF; Arabic 3741 0x0700, // 0700..074F; Syriac 3742 0x0750, // 0750..077F; Arabic Supplement 3743 0x0780, // 0780..07BF; Thaana 3744 0x07C0, // 07C0..07FF; NKo 3745 0x0800, // 0800..083F; Samaritan 3746 0x0840, // 0840..085F; Mandaic 3747 0x0860, // 0860..086F; Syriac Supplement 3748 0x0870, // 0870..089F; Arabic Extended-B 3749 0x08A0, // 08A0..08FF; Arabic Extended-A 3750 0x0900, // 0900..097F; Devanagari 3751 0x0980, // 0980..09FF; Bengali 3752 0x0A00, // 0A00..0A7F; Gurmukhi 3753 0x0A80, // 0A80..0AFF; Gujarati 3754 0x0B00, // 0B00..0B7F; Oriya 3755 0x0B80, // 0B80..0BFF; Tamil 3756 0x0C00, // 0C00..0C7F; Telugu 3757 0x0C80, // 0C80..0CFF; Kannada 3758 0x0D00, // 0D00..0D7F; Malayalam 3759 0x0D80, // 0D80..0DFF; Sinhala 3760 0x0E00, // 0E00..0E7F; Thai 3761 0x0E80, // 0E80..0EFF; Lao 3762 0x0F00, // 0F00..0FFF; Tibetan 3763 0x1000, // 1000..109F; Myanmar 3764 0x10A0, // 10A0..10FF; Georgian 3765 0x1100, // 1100..11FF; Hangul Jamo 3766 0x1200, // 1200..137F; Ethiopic 3767 0x1380, // 1380..139F; Ethiopic Supplement 3768 0x13A0, // 13A0..13FF; Cherokee 3769 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3770 0x1680, // 1680..169F; Ogham 3771 0x16A0, // 16A0..16FF; Runic 3772 0x1700, // 1700..171F; Tagalog 3773 0x1720, // 1720..173F; Hanunoo 3774 0x1740, // 1740..175F; Buhid 3775 0x1760, // 1760..177F; Tagbanwa 3776 0x1780, // 1780..17FF; Khmer 3777 0x1800, // 1800..18AF; Mongolian 3778 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3779 0x1900, // 1900..194F; Limbu 3780 0x1950, // 1950..197F; Tai Le 3781 0x1980, // 1980..19DF; New Tai Lue 3782 0x19E0, // 19E0..19FF; Khmer Symbols 3783 0x1A00, // 1A00..1A1F; Buginese 3784 0x1A20, // 1A20..1AAF; Tai Tham 3785 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3786 0x1B00, // 1B00..1B7F; Balinese 3787 0x1B80, // 1B80..1BBF; Sundanese 3788 0x1BC0, // 1BC0..1BFF; Batak 3789 0x1C00, // 1C00..1C4F; Lepcha 3790 0x1C50, // 1C50..1C7F; Ol Chiki 3791 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3792 0x1C90, // 1C90..1CBF; Georgian Extended 3793 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3794 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3795 0x1D00, // 1D00..1D7F; Phonetic Extensions 3796 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3797 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3798 0x1E00, // 1E00..1EFF; Latin Extended Additional 3799 0x1F00, // 1F00..1FFF; Greek Extended 3800 0x2000, // 2000..206F; General Punctuation 3801 0x2070, // 2070..209F; Superscripts and Subscripts 3802 0x20A0, // 20A0..20CF; Currency Symbols 3803 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3804 0x2100, // 2100..214F; Letterlike Symbols 3805 0x2150, // 2150..218F; Number Forms 3806 0x2190, // 2190..21FF; Arrows 3807 0x2200, // 2200..22FF; Mathematical Operators 3808 0x2300, // 2300..23FF; Miscellaneous Technical 3809 0x2400, // 2400..243F; Control Pictures 3810 0x2440, // 2440..245F; Optical Character Recognition 3811 0x2460, // 2460..24FF; Enclosed Alphanumerics 3812 0x2500, // 2500..257F; Box Drawing 3813 0x2580, // 2580..259F; Block Elements 3814 0x25A0, // 25A0..25FF; Geometric Shapes 3815 0x2600, // 2600..26FF; Miscellaneous Symbols 3816 0x2700, // 2700..27BF; Dingbats 3817 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3818 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3819 0x2800, // 2800..28FF; Braille Patterns 3820 0x2900, // 2900..297F; Supplemental Arrows-B 3821 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3822 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3823 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3824 0x2C00, // 2C00..2C5F; Glagolitic 3825 0x2C60, // 2C60..2C7F; Latin Extended-C 3826 0x2C80, // 2C80..2CFF; Coptic 3827 0x2D00, // 2D00..2D2F; Georgian Supplement 3828 0x2D30, // 2D30..2D7F; Tifinagh 3829 0x2D80, // 2D80..2DDF; Ethiopic Extended 3830 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3831 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3832 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3833 0x2F00, // 2F00..2FDF; Kangxi Radicals 3834 0x2FE0, // unassigned 3835 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3836 0x3000, // 3000..303F; CJK Symbols and Punctuation 3837 0x3040, // 3040..309F; Hiragana 3838 0x30A0, // 30A0..30FF; Katakana 3839 0x3100, // 3100..312F; Bopomofo 3840 0x3130, // 3130..318F; Hangul Compatibility Jamo 3841 0x3190, // 3190..319F; Kanbun 3842 0x31A0, // 31A0..31BF; Bopomofo Extended 3843 0x31C0, // 31C0..31EF; CJK Strokes 3844 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3845 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3846 0x3300, // 3300..33FF; CJK Compatibility 3847 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3848 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3849 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3850 0xA000, // A000..A48F; Yi Syllables 3851 0xA490, // A490..A4CF; Yi Radicals 3852 0xA4D0, // A4D0..A4FF; Lisu 3853 0xA500, // A500..A63F; Vai 3854 0xA640, // A640..A69F; Cyrillic Extended-B 3855 0xA6A0, // A6A0..A6FF; Bamum 3856 0xA700, // A700..A71F; Modifier Tone Letters 3857 0xA720, // A720..A7FF; Latin Extended-D 3858 0xA800, // A800..A82F; Syloti Nagri 3859 0xA830, // A830..A83F; Common Indic Number Forms 3860 0xA840, // A840..A87F; Phags-pa 3861 0xA880, // A880..A8DF; Saurashtra 3862 0xA8E0, // A8E0..A8FF; Devanagari Extended 3863 0xA900, // A900..A92F; Kayah Li 3864 0xA930, // A930..A95F; Rejang 3865 0xA960, // A960..A97F; Hangul Jamo Extended-A 3866 0xA980, // A980..A9DF; Javanese 3867 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3868 0xAA00, // AA00..AA5F; Cham 3869 0xAA60, // AA60..AA7F; Myanmar Extended-A 3870 0xAA80, // AA80..AADF; Tai Viet 3871 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3872 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3873 0xAB30, // AB30..AB6F; Latin Extended-E 3874 0xAB70, // AB70..ABBF; Cherokee Supplement 3875 0xABC0, // ABC0..ABFF; Meetei Mayek 3876 0xAC00, // AC00..D7AF; Hangul Syllables 3877 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3878 0xD800, // D800..DB7F; High Surrogates 3879 0xDB80, // DB80..DBFF; High Private Use Surrogates 3880 0xDC00, // DC00..DFFF; Low Surrogates 3881 0xE000, // E000..F8FF; Private Use Area 3882 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3883 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3884 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3885 0xFE00, // FE00..FE0F; Variation Selectors 3886 0xFE10, // FE10..FE1F; Vertical Forms 3887 0xFE20, // FE20..FE2F; Combining Half Marks 3888 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3889 0xFE50, // FE50..FE6F; Small Form Variants 3890 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3891 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3892 0xFFF0, // FFF0..FFFF; Specials 3893 0x10000, // 10000..1007F; Linear B Syllabary 3894 0x10080, // 10080..100FF; Linear B Ideograms 3895 0x10100, // 10100..1013F; Aegean Numbers 3896 0x10140, // 10140..1018F; Ancient Greek Numbers 3897 0x10190, // 10190..101CF; Ancient Symbols 3898 0x101D0, // 101D0..101FF; Phaistos Disc 3899 0x10200, // unassigned 3900 0x10280, // 10280..1029F; Lycian 3901 0x102A0, // 102A0..102DF; Carian 3902 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3903 0x10300, // 10300..1032F; Old Italic 3904 0x10330, // 10330..1034F; Gothic 3905 0x10350, // 10350..1037F; Old Permic 3906 0x10380, // 10380..1039F; Ugaritic 3907 0x103A0, // 103A0..103DF; Old Persian 3908 0x103E0, // unassigned 3909 0x10400, // 10400..1044F; Deseret 3910 0x10450, // 10450..1047F; Shavian 3911 0x10480, // 10480..104AF; Osmanya 3912 0x104B0, // 104B0..104FF; Osage 3913 0x10500, // 10500..1052F; Elbasan 3914 0x10530, // 10530..1056F; Caucasian Albanian 3915 0x10570, // 10570..105BF; Vithkuqi 3916 0x105C0, // 105C0..105FF; Todhri 3917 0x10600, // 10600..1077F; Linear A 3918 0x10780, // 10780..107BF; Latin Extended-F 3919 0x107C0, // unassigned 3920 0x10800, // 10800..1083F; Cypriot Syllabary 3921 0x10840, // 10840..1085F; Imperial Aramaic 3922 0x10860, // 10860..1087F; Palmyrene 3923 0x10880, // 10880..108AF; Nabataean 3924 0x108B0, // unassigned 3925 0x108E0, // 108E0..108FF; Hatran 3926 0x10900, // 10900..1091F; Phoenician 3927 0x10920, // 10920..1093F; Lydian 3928 0x10940, // unassigned 3929 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3930 0x109A0, // 109A0..109FF; Meroitic Cursive 3931 0x10A00, // 10A00..10A5F; Kharoshthi 3932 0x10A60, // 10A60..10A7F; Old South Arabian 3933 0x10A80, // 10A80..10A9F; Old North Arabian 3934 0x10AA0, // unassigned 3935 0x10AC0, // 10AC0..10AFF; Manichaean 3936 0x10B00, // 10B00..10B3F; Avestan 3937 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3938 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3939 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3940 0x10BB0, // unassigned 3941 0x10C00, // 10C00..10C4F; Old Turkic 3942 0x10C50, // unassigned 3943 0x10C80, // 10C80..10CFF; Old Hungarian 3944 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3945 0x10D40, // 10D40..10D8F; Garay 3946 0x10D90, // unassigned 3947 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3948 0x10E80, // 10E80..10EBF; Yezidi 3949 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3950 0x10F00, // 10F00..10F2F; Old Sogdian 3951 0x10F30, // 10F30..10F6F; Sogdian 3952 0x10F70, // 10F70..10FAF; Old Uyghur 3953 0x10FB0, // 10FB0..10FDF; Chorasmian 3954 0x10FE0, // 10FE0..10FFF; Elymaic 3955 0x11000, // 11000..1107F; Brahmi 3956 0x11080, // 11080..110CF; Kaithi 3957 0x110D0, // 110D0..110FF; Sora Sompeng 3958 0x11100, // 11100..1114F; Chakma 3959 0x11150, // 11150..1117F; Mahajani 3960 0x11180, // 11180..111DF; Sharada 3961 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3962 0x11200, // 11200..1124F; Khojki 3963 0x11250, // unassigned 3964 0x11280, // 11280..112AF; Multani 3965 0x112B0, // 112B0..112FF; Khudawadi 3966 0x11300, // 11300..1137F; Grantha 3967 0x11380, // 11380..113FF; Tulu-Tigalari 3968 0x11400, // 11400..1147F; Newa 3969 0x11480, // 11480..114DF; Tirhuta 3970 0x114E0, // unassigned 3971 0x11580, // 11580..115FF; Siddham 3972 0x11600, // 11600..1165F; Modi 3973 0x11660, // 11660..1167F; Mongolian Supplement 3974 0x11680, // 11680..116CF; Takri 3975 0x116D0, // 116D0..116FF; Myanmar Extended-C 3976 0x11700, // 11700..1174F; Ahom 3977 0x11750, // unassigned 3978 0x11800, // 11800..1184F; Dogra 3979 0x11850, // unassigned 3980 0x118A0, // 118A0..118FF; Warang Citi 3981 0x11900, // 11900..1195F; Dives Akuru 3982 0x11960, // unassigned 3983 0x119A0, // 119A0..119FF; Nandinagari 3984 0x11A00, // 11A00..11A4F; Zanabazar Square 3985 0x11A50, // 11A50..11AAF; Soyombo 3986 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3987 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3988 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3989 0x11B60, // unassigned 3990 0x11BC0, // 11BC0..11BFF; Sunuwar 3991 0x11C00, // 11C00..11C6F; Bhaiksuki 3992 0x11C70, // 11C70..11CBF; Marchen 3993 0x11CC0, // unassigned 3994 0x11D00, // 11D00..11D5F; Masaram Gondi 3995 0x11D60, // 11D60..11DAF; Gunjala Gondi 3996 0x11DB0, // unassigned 3997 0x11EE0, // 11EE0..11EFF; Makasar 3998 0x11F00, // 11F00..11F5F; Kawi 3999 0x11F60, // unassigned 4000 0x11FB0, // 11FB0..11FBF; Lisu Supplement 4001 0x11FC0, // 11FC0..11FFF; Tamil Supplement 4002 0x12000, // 12000..123FF; Cuneiform 4003 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 4004 0x12480, // 12480..1254F; Early Dynastic Cuneiform 4005 0x12550, // unassigned 4006 0x12F90, // 12F90..12FFF; Cypro-Minoan 4007 0x13000, // 13000..1342F; Egyptian Hieroglyphs 4008 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 4009 0x13460, // 13460..143FF; Egyptian Hieroglyphs Extended-A 4010 0x14400, // 14400..1467F; Anatolian Hieroglyphs 4011 0x14680, // unassigned 4012 0x16100, // 16100..1613F; Gurung Khema 4013 0x16140, // unassigned 4014 0x16800, // 16800..16A3F; Bamum Supplement 4015 0x16A40, // 16A40..16A6F; Mro 4016 0x16A70, // 16A70..16ACF; Tangsa 4017 0x16AD0, // 16AD0..16AFF; Bassa Vah 4018 0x16B00, // 16B00..16B8F; Pahawh Hmong 4019 0x16B90, // unassigned 4020 0x16D40, // 16D40..16D7F; Kirat Rai 4021 0x16D80, // unassigned 4022 0x16E40, // 16E40..16E9F; Medefaidrin 4023 0x16EA0, // unassigned 4024 0x16F00, // 16F00..16F9F; Miao 4025 0x16FA0, // unassigned 4026 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 4027 0x17000, // 17000..187FF; Tangut 4028 0x18800, // 18800..18AFF; Tangut Components 4029 0x18B00, // 18B00..18CFF; Khitan Small Script 4030 0x18D00, // 18D00..18D7F; Tangut Supplement 4031 0x18D80, // unassigned 4032 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 4033 0x1B000, // 1B000..1B0FF; Kana Supplement 4034 0x1B100, // 1B100..1B12F; Kana Extended-A 4035 0x1B130, // 1B130..1B16F; Small Kana Extension 4036 0x1B170, // 1B170..1B2FF; Nushu 4037 0x1B300, // unassigned 4038 0x1BC00, // 1BC00..1BC9F; Duployan 4039 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 4040 0x1BCB0, // unassigned 4041 0x1CC00, // 1CC00..1CEBF; Symbols for Legacy Computing Supplement 4042 0x1CEC0, // unassigned 4043 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 4044 0x1CFD0, // unassigned 4045 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 4046 0x1D100, // 1D100..1D1FF; Musical Symbols 4047 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 4048 0x1D250, // unassigned 4049 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 4050 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 4051 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 4052 0x1D360, // 1D360..1D37F; Counting Rod Numerals 4053 0x1D380, // unassigned 4054 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 4055 0x1D800, // 1D800..1DAAF; Sutton SignWriting 4056 0x1DAB0, // unassigned 4057 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 4058 0x1E000, // 1E000..1E02F; Glagolitic Supplement 4059 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 4060 0x1E090, // unassigned 4061 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 4062 0x1E150, // unassigned 4063 0x1E290, // 1E290..1E2BF; Toto 4064 0x1E2C0, // 1E2C0..1E2FF; Wancho 4065 0x1E300, // unassigned 4066 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 4067 0x1E500, // unassigned 4068 0x1E5D0, // 1E5D0..1E5FF; Ol Onal 4069 0x1E600, // unassigned 4070 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 4071 0x1E800, // 1E800..1E8DF; Mende Kikakui 4072 0x1E8E0, // unassigned 4073 0x1E900, // 1E900..1E95F; Adlam 4074 0x1E960, // unassigned 4075 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 4076 0x1ECC0, // unassigned 4077 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 4078 0x1ED50, // unassigned 4079 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 4080 0x1EF00, // unassigned 4081 0x1F000, // 1F000..1F02F; Mahjong Tiles 4082 0x1F030, // 1F030..1F09F; Domino Tiles 4083 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 4084 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 4085 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 4086 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 4087 0x1F600, // 1F600..1F64F; Emoticons 4088 0x1F650, // 1F650..1F67F; Ornamental Dingbats 4089 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 4090 0x1F700, // 1F700..1F77F; Alchemical Symbols 4091 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 4092 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 4093 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 4094 0x1FA00, // 1FA00..1FA6F; Chess Symbols 4095 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 4096 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 4097 0x1FC00, // unassigned 4098 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 4099 0x2A6E0, // unassigned 4100 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 4101 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 4102 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4103 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4104 0x2EBF0, // 2EBF0..2EE5F; CJK Unified Ideographs Extension I 4105 0x2EE60, // unassigned 4106 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4107 0x2FA20, // unassigned 4108 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4109 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4110 0x323B0, // unassigned 4111 0xE0000, // E0000..E007F; Tags 4112 0xE0080, // unassigned 4113 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4114 0xE01F0, // unassigned 4115 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4116 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4117 }; 4118 4119 private static final UnicodeBlock[] blocks = { 4120 BASIC_LATIN, 4121 LATIN_1_SUPPLEMENT, 4122 LATIN_EXTENDED_A, 4123 LATIN_EXTENDED_B, 4124 IPA_EXTENSIONS, 4125 SPACING_MODIFIER_LETTERS, 4126 COMBINING_DIACRITICAL_MARKS, 4127 GREEK, 4128 CYRILLIC, 4129 CYRILLIC_SUPPLEMENTARY, 4130 ARMENIAN, 4131 HEBREW, 4132 ARABIC, 4133 SYRIAC, 4134 ARABIC_SUPPLEMENT, 4135 THAANA, 4136 NKO, 4137 SAMARITAN, 4138 MANDAIC, 4139 SYRIAC_SUPPLEMENT, 4140 ARABIC_EXTENDED_B, 4141 ARABIC_EXTENDED_A, 4142 DEVANAGARI, 4143 BENGALI, 4144 GURMUKHI, 4145 GUJARATI, 4146 ORIYA, 4147 TAMIL, 4148 TELUGU, 4149 KANNADA, 4150 MALAYALAM, 4151 SINHALA, 4152 THAI, 4153 LAO, 4154 TIBETAN, 4155 MYANMAR, 4156 GEORGIAN, 4157 HANGUL_JAMO, 4158 ETHIOPIC, 4159 ETHIOPIC_SUPPLEMENT, 4160 CHEROKEE, 4161 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4162 OGHAM, 4163 RUNIC, 4164 TAGALOG, 4165 HANUNOO, 4166 BUHID, 4167 TAGBANWA, 4168 KHMER, 4169 MONGOLIAN, 4170 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4171 LIMBU, 4172 TAI_LE, 4173 NEW_TAI_LUE, 4174 KHMER_SYMBOLS, 4175 BUGINESE, 4176 TAI_THAM, 4177 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4178 BALINESE, 4179 SUNDANESE, 4180 BATAK, 4181 LEPCHA, 4182 OL_CHIKI, 4183 CYRILLIC_EXTENDED_C, 4184 GEORGIAN_EXTENDED, 4185 SUNDANESE_SUPPLEMENT, 4186 VEDIC_EXTENSIONS, 4187 PHONETIC_EXTENSIONS, 4188 PHONETIC_EXTENSIONS_SUPPLEMENT, 4189 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4190 LATIN_EXTENDED_ADDITIONAL, 4191 GREEK_EXTENDED, 4192 GENERAL_PUNCTUATION, 4193 SUPERSCRIPTS_AND_SUBSCRIPTS, 4194 CURRENCY_SYMBOLS, 4195 COMBINING_MARKS_FOR_SYMBOLS, 4196 LETTERLIKE_SYMBOLS, 4197 NUMBER_FORMS, 4198 ARROWS, 4199 MATHEMATICAL_OPERATORS, 4200 MISCELLANEOUS_TECHNICAL, 4201 CONTROL_PICTURES, 4202 OPTICAL_CHARACTER_RECOGNITION, 4203 ENCLOSED_ALPHANUMERICS, 4204 BOX_DRAWING, 4205 BLOCK_ELEMENTS, 4206 GEOMETRIC_SHAPES, 4207 MISCELLANEOUS_SYMBOLS, 4208 DINGBATS, 4209 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4210 SUPPLEMENTAL_ARROWS_A, 4211 BRAILLE_PATTERNS, 4212 SUPPLEMENTAL_ARROWS_B, 4213 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4214 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4215 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4216 GLAGOLITIC, 4217 LATIN_EXTENDED_C, 4218 COPTIC, 4219 GEORGIAN_SUPPLEMENT, 4220 TIFINAGH, 4221 ETHIOPIC_EXTENDED, 4222 CYRILLIC_EXTENDED_A, 4223 SUPPLEMENTAL_PUNCTUATION, 4224 CJK_RADICALS_SUPPLEMENT, 4225 KANGXI_RADICALS, 4226 null, 4227 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4228 CJK_SYMBOLS_AND_PUNCTUATION, 4229 HIRAGANA, 4230 KATAKANA, 4231 BOPOMOFO, 4232 HANGUL_COMPATIBILITY_JAMO, 4233 KANBUN, 4234 BOPOMOFO_EXTENDED, 4235 CJK_STROKES, 4236 KATAKANA_PHONETIC_EXTENSIONS, 4237 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4238 CJK_COMPATIBILITY, 4239 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4240 YIJING_HEXAGRAM_SYMBOLS, 4241 CJK_UNIFIED_IDEOGRAPHS, 4242 YI_SYLLABLES, 4243 YI_RADICALS, 4244 LISU, 4245 VAI, 4246 CYRILLIC_EXTENDED_B, 4247 BAMUM, 4248 MODIFIER_TONE_LETTERS, 4249 LATIN_EXTENDED_D, 4250 SYLOTI_NAGRI, 4251 COMMON_INDIC_NUMBER_FORMS, 4252 PHAGS_PA, 4253 SAURASHTRA, 4254 DEVANAGARI_EXTENDED, 4255 KAYAH_LI, 4256 REJANG, 4257 HANGUL_JAMO_EXTENDED_A, 4258 JAVANESE, 4259 MYANMAR_EXTENDED_B, 4260 CHAM, 4261 MYANMAR_EXTENDED_A, 4262 TAI_VIET, 4263 MEETEI_MAYEK_EXTENSIONS, 4264 ETHIOPIC_EXTENDED_A, 4265 LATIN_EXTENDED_E, 4266 CHEROKEE_SUPPLEMENT, 4267 MEETEI_MAYEK, 4268 HANGUL_SYLLABLES, 4269 HANGUL_JAMO_EXTENDED_B, 4270 HIGH_SURROGATES, 4271 HIGH_PRIVATE_USE_SURROGATES, 4272 LOW_SURROGATES, 4273 PRIVATE_USE_AREA, 4274 CJK_COMPATIBILITY_IDEOGRAPHS, 4275 ALPHABETIC_PRESENTATION_FORMS, 4276 ARABIC_PRESENTATION_FORMS_A, 4277 VARIATION_SELECTORS, 4278 VERTICAL_FORMS, 4279 COMBINING_HALF_MARKS, 4280 CJK_COMPATIBILITY_FORMS, 4281 SMALL_FORM_VARIANTS, 4282 ARABIC_PRESENTATION_FORMS_B, 4283 HALFWIDTH_AND_FULLWIDTH_FORMS, 4284 SPECIALS, 4285 LINEAR_B_SYLLABARY, 4286 LINEAR_B_IDEOGRAMS, 4287 AEGEAN_NUMBERS, 4288 ANCIENT_GREEK_NUMBERS, 4289 ANCIENT_SYMBOLS, 4290 PHAISTOS_DISC, 4291 null, 4292 LYCIAN, 4293 CARIAN, 4294 COPTIC_EPACT_NUMBERS, 4295 OLD_ITALIC, 4296 GOTHIC, 4297 OLD_PERMIC, 4298 UGARITIC, 4299 OLD_PERSIAN, 4300 null, 4301 DESERET, 4302 SHAVIAN, 4303 OSMANYA, 4304 OSAGE, 4305 ELBASAN, 4306 CAUCASIAN_ALBANIAN, 4307 VITHKUQI, 4308 TODHRI, 4309 LINEAR_A, 4310 LATIN_EXTENDED_F, 4311 null, 4312 CYPRIOT_SYLLABARY, 4313 IMPERIAL_ARAMAIC, 4314 PALMYRENE, 4315 NABATAEAN, 4316 null, 4317 HATRAN, 4318 PHOENICIAN, 4319 LYDIAN, 4320 null, 4321 MEROITIC_HIEROGLYPHS, 4322 MEROITIC_CURSIVE, 4323 KHAROSHTHI, 4324 OLD_SOUTH_ARABIAN, 4325 OLD_NORTH_ARABIAN, 4326 null, 4327 MANICHAEAN, 4328 AVESTAN, 4329 INSCRIPTIONAL_PARTHIAN, 4330 INSCRIPTIONAL_PAHLAVI, 4331 PSALTER_PAHLAVI, 4332 null, 4333 OLD_TURKIC, 4334 null, 4335 OLD_HUNGARIAN, 4336 HANIFI_ROHINGYA, 4337 GARAY, 4338 null, 4339 RUMI_NUMERAL_SYMBOLS, 4340 YEZIDI, 4341 ARABIC_EXTENDED_C, 4342 OLD_SOGDIAN, 4343 SOGDIAN, 4344 OLD_UYGHUR, 4345 CHORASMIAN, 4346 ELYMAIC, 4347 BRAHMI, 4348 KAITHI, 4349 SORA_SOMPENG, 4350 CHAKMA, 4351 MAHAJANI, 4352 SHARADA, 4353 SINHALA_ARCHAIC_NUMBERS, 4354 KHOJKI, 4355 null, 4356 MULTANI, 4357 KHUDAWADI, 4358 GRANTHA, 4359 TULU_TIGALARI, 4360 NEWA, 4361 TIRHUTA, 4362 null, 4363 SIDDHAM, 4364 MODI, 4365 MONGOLIAN_SUPPLEMENT, 4366 TAKRI, 4367 MYANMAR_EXTENDED_C, 4368 AHOM, 4369 null, 4370 DOGRA, 4371 null, 4372 WARANG_CITI, 4373 DIVES_AKURU, 4374 null, 4375 NANDINAGARI, 4376 ZANABAZAR_SQUARE, 4377 SOYOMBO, 4378 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4379 PAU_CIN_HAU, 4380 DEVANAGARI_EXTENDED_A, 4381 null, 4382 SUNUWAR, 4383 BHAIKSUKI, 4384 MARCHEN, 4385 null, 4386 MASARAM_GONDI, 4387 GUNJALA_GONDI, 4388 null, 4389 MAKASAR, 4390 KAWI, 4391 null, 4392 LISU_SUPPLEMENT, 4393 TAMIL_SUPPLEMENT, 4394 CUNEIFORM, 4395 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4396 EARLY_DYNASTIC_CUNEIFORM, 4397 null, 4398 CYPRO_MINOAN, 4399 EGYPTIAN_HIEROGLYPHS, 4400 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4401 EGYPTIAN_HIEROGLYPHS_EXTENDED_A, 4402 ANATOLIAN_HIEROGLYPHS, 4403 null, 4404 GURUNG_KHEMA, 4405 null, 4406 BAMUM_SUPPLEMENT, 4407 MRO, 4408 TANGSA, 4409 BASSA_VAH, 4410 PAHAWH_HMONG, 4411 null, 4412 KIRAT_RAI, 4413 null, 4414 MEDEFAIDRIN, 4415 null, 4416 MIAO, 4417 null, 4418 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4419 TANGUT, 4420 TANGUT_COMPONENTS, 4421 KHITAN_SMALL_SCRIPT, 4422 TANGUT_SUPPLEMENT, 4423 null, 4424 KANA_EXTENDED_B, 4425 KANA_SUPPLEMENT, 4426 KANA_EXTENDED_A, 4427 SMALL_KANA_EXTENSION, 4428 NUSHU, 4429 null, 4430 DUPLOYAN, 4431 SHORTHAND_FORMAT_CONTROLS, 4432 null, 4433 SYMBOLS_FOR_LEGACY_COMPUTING_SUPPLEMENT, 4434 null, 4435 ZNAMENNY_MUSICAL_NOTATION, 4436 null, 4437 BYZANTINE_MUSICAL_SYMBOLS, 4438 MUSICAL_SYMBOLS, 4439 ANCIENT_GREEK_MUSICAL_NOTATION, 4440 null, 4441 KAKTOVIK_NUMERALS, 4442 MAYAN_NUMERALS, 4443 TAI_XUAN_JING_SYMBOLS, 4444 COUNTING_ROD_NUMERALS, 4445 null, 4446 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4447 SUTTON_SIGNWRITING, 4448 null, 4449 LATIN_EXTENDED_G, 4450 GLAGOLITIC_SUPPLEMENT, 4451 CYRILLIC_EXTENDED_D, 4452 null, 4453 NYIAKENG_PUACHUE_HMONG, 4454 null, 4455 TOTO, 4456 WANCHO, 4457 null, 4458 NAG_MUNDARI, 4459 null, 4460 OL_ONAL, 4461 null, 4462 ETHIOPIC_EXTENDED_B, 4463 MENDE_KIKAKUI, 4464 null, 4465 ADLAM, 4466 null, 4467 INDIC_SIYAQ_NUMBERS, 4468 null, 4469 OTTOMAN_SIYAQ_NUMBERS, 4470 null, 4471 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4472 null, 4473 MAHJONG_TILES, 4474 DOMINO_TILES, 4475 PLAYING_CARDS, 4476 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4477 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4478 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4479 EMOTICONS, 4480 ORNAMENTAL_DINGBATS, 4481 TRANSPORT_AND_MAP_SYMBOLS, 4482 ALCHEMICAL_SYMBOLS, 4483 GEOMETRIC_SHAPES_EXTENDED, 4484 SUPPLEMENTAL_ARROWS_C, 4485 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4486 CHESS_SYMBOLS, 4487 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4488 SYMBOLS_FOR_LEGACY_COMPUTING, 4489 null, 4490 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4491 null, 4492 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4493 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4494 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4495 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4496 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I, 4497 null, 4498 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4499 null, 4500 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4501 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4502 null, 4503 TAGS, 4504 null, 4505 VARIATION_SELECTORS_SUPPLEMENT, 4506 null, 4507 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4508 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4509 }; 4510 4511 4512 /** 4513 * Returns the object representing the Unicode block containing the 4514 * given character, or {@code null} if the character is not a 4515 * member of a defined block. 4516 * 4517 * <p><b>Note:</b> This method cannot handle 4518 * <a href="Character.html#supplementary"> supplementary 4519 * characters</a>. To support all Unicode characters, including 4520 * supplementary characters, use the {@link #of(int)} method. 4521 * 4522 * @param c The character in question 4523 * @return The {@code UnicodeBlock} instance representing the 4524 * Unicode block of which this character is a member, or 4525 * {@code null} if the character is not a member of any 4526 * Unicode block 4527 */ 4528 public static UnicodeBlock of(char c) { 4529 return of((int)c); 4530 } 4531 4532 /** 4533 * Returns the object representing the Unicode block 4534 * containing the given character (Unicode code point), or 4535 * {@code null} if the character is not a member of a 4536 * defined block. 4537 * 4538 * @param codePoint the character (Unicode code point) in question. 4539 * @return The {@code UnicodeBlock} instance representing the 4540 * Unicode block of which this character is a member, or 4541 * {@code null} if the character is not a member of any 4542 * Unicode block 4543 * @throws IllegalArgumentException if the specified 4544 * {@code codePoint} is an invalid Unicode code point. 4545 * @see Character#isValidCodePoint(int) 4546 * @since 1.5 4547 */ 4548 public static UnicodeBlock of(int codePoint) { 4549 if (!isValidCodePoint(codePoint)) { 4550 throw new IllegalArgumentException( 4551 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4552 } 4553 4554 int top, bottom, current; 4555 bottom = 0; 4556 top = blockStarts.length; 4557 current = top/2; 4558 4559 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4560 while (top - bottom > 1) { 4561 if (codePoint >= blockStarts[current]) { 4562 bottom = current; 4563 } else { 4564 top = current; 4565 } 4566 current = (top + bottom) / 2; 4567 } 4568 return blocks[current]; 4569 } 4570 4571 /** 4572 * Returns the UnicodeBlock with the given name. Block 4573 * names are determined by The Unicode Standard. The file 4574 * {@code Blocks.txt} defines blocks for a particular 4575 * version of the standard. The {@link Character} class specifies 4576 * the version of the standard that it supports. 4577 * <p> 4578 * This method accepts block names in the following forms: 4579 * <ol> 4580 * <li> Canonical block names as defined by the Unicode Standard. 4581 * For example, the standard defines a "Basic Latin" block. Therefore, this 4582 * method accepts "Basic Latin" as a valid block name. The documentation of 4583 * each UnicodeBlock provides the canonical name. 4584 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4585 * is a valid block name for the "Basic Latin" block. 4586 * <li>The text representation of each constant UnicodeBlock identifier. 4587 * For example, this method will return the {@link #BASIC_LATIN} block if 4588 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4589 * hyphens in the canonical name with underscores. 4590 * </ol> 4591 * Finally, character case is ignored for all of the valid block name forms. 4592 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4593 * The en_US locale's case mapping rules are used to provide case-insensitive 4594 * string comparisons for block name validation. 4595 * <p> 4596 * If the Unicode Standard changes block names, both the previous and 4597 * current names will be accepted. 4598 * 4599 * @param blockName A {@code UnicodeBlock} name. 4600 * @return The {@code UnicodeBlock} instance identified 4601 * by {@code blockName} 4602 * @throws IllegalArgumentException if {@code blockName} is an 4603 * invalid name 4604 * @throws NullPointerException if {@code blockName} is null 4605 * @since 1.5 4606 */ 4607 public static final UnicodeBlock forName(String blockName) { 4608 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4609 if (block == null) { 4610 throw new IllegalArgumentException("Not a valid block name: " 4611 + blockName); 4612 } 4613 return block; 4614 } 4615 } 4616 4617 4618 /** 4619 * A family of character subsets representing the character scripts 4620 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4621 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4622 * character is assigned to a single Unicode script, either a specific 4623 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4624 * one of the following three special values, 4625 * {@link Character.UnicodeScript#INHERITED Inherited}, 4626 * {@link Character.UnicodeScript#COMMON Common} or 4627 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4628 * 4629 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4630 * @since 1.7 4631 */ 4632 public static enum UnicodeScript { 4633 4634 /** 4635 * Unicode script "Common". 4636 */ 4637 COMMON, 4638 4639 /** 4640 * Unicode script "Latin". 4641 */ 4642 LATIN, 4643 4644 /** 4645 * Unicode script "Greek". 4646 */ 4647 GREEK, 4648 4649 /** 4650 * Unicode script "Cyrillic". 4651 */ 4652 CYRILLIC, 4653 4654 /** 4655 * Unicode script "Armenian". 4656 */ 4657 ARMENIAN, 4658 4659 /** 4660 * Unicode script "Hebrew". 4661 */ 4662 HEBREW, 4663 4664 /** 4665 * Unicode script "Arabic". 4666 */ 4667 ARABIC, 4668 4669 /** 4670 * Unicode script "Syriac". 4671 */ 4672 SYRIAC, 4673 4674 /** 4675 * Unicode script "Thaana". 4676 */ 4677 THAANA, 4678 4679 /** 4680 * Unicode script "Devanagari". 4681 */ 4682 DEVANAGARI, 4683 4684 /** 4685 * Unicode script "Bengali". 4686 */ 4687 BENGALI, 4688 4689 /** 4690 * Unicode script "Gurmukhi". 4691 */ 4692 GURMUKHI, 4693 4694 /** 4695 * Unicode script "Gujarati". 4696 */ 4697 GUJARATI, 4698 4699 /** 4700 * Unicode script "Oriya". 4701 */ 4702 ORIYA, 4703 4704 /** 4705 * Unicode script "Tamil". 4706 */ 4707 TAMIL, 4708 4709 /** 4710 * Unicode script "Telugu". 4711 */ 4712 TELUGU, 4713 4714 /** 4715 * Unicode script "Kannada". 4716 */ 4717 KANNADA, 4718 4719 /** 4720 * Unicode script "Malayalam". 4721 */ 4722 MALAYALAM, 4723 4724 /** 4725 * Unicode script "Sinhala". 4726 */ 4727 SINHALA, 4728 4729 /** 4730 * Unicode script "Thai". 4731 */ 4732 THAI, 4733 4734 /** 4735 * Unicode script "Lao". 4736 */ 4737 LAO, 4738 4739 /** 4740 * Unicode script "Tibetan". 4741 */ 4742 TIBETAN, 4743 4744 /** 4745 * Unicode script "Myanmar". 4746 */ 4747 MYANMAR, 4748 4749 /** 4750 * Unicode script "Georgian". 4751 */ 4752 GEORGIAN, 4753 4754 /** 4755 * Unicode script "Hangul". 4756 */ 4757 HANGUL, 4758 4759 /** 4760 * Unicode script "Ethiopic". 4761 */ 4762 ETHIOPIC, 4763 4764 /** 4765 * Unicode script "Cherokee". 4766 */ 4767 CHEROKEE, 4768 4769 /** 4770 * Unicode script "Canadian_Aboriginal". 4771 */ 4772 CANADIAN_ABORIGINAL, 4773 4774 /** 4775 * Unicode script "Ogham". 4776 */ 4777 OGHAM, 4778 4779 /** 4780 * Unicode script "Runic". 4781 */ 4782 RUNIC, 4783 4784 /** 4785 * Unicode script "Khmer". 4786 */ 4787 KHMER, 4788 4789 /** 4790 * Unicode script "Mongolian". 4791 */ 4792 MONGOLIAN, 4793 4794 /** 4795 * Unicode script "Hiragana". 4796 */ 4797 HIRAGANA, 4798 4799 /** 4800 * Unicode script "Katakana". 4801 */ 4802 KATAKANA, 4803 4804 /** 4805 * Unicode script "Bopomofo". 4806 */ 4807 BOPOMOFO, 4808 4809 /** 4810 * Unicode script "Han". 4811 */ 4812 HAN, 4813 4814 /** 4815 * Unicode script "Yi". 4816 */ 4817 YI, 4818 4819 /** 4820 * Unicode script "Old_Italic". 4821 */ 4822 OLD_ITALIC, 4823 4824 /** 4825 * Unicode script "Gothic". 4826 */ 4827 GOTHIC, 4828 4829 /** 4830 * Unicode script "Deseret". 4831 */ 4832 DESERET, 4833 4834 /** 4835 * Unicode script "Inherited". 4836 */ 4837 INHERITED, 4838 4839 /** 4840 * Unicode script "Tagalog". 4841 */ 4842 TAGALOG, 4843 4844 /** 4845 * Unicode script "Hanunoo". 4846 */ 4847 HANUNOO, 4848 4849 /** 4850 * Unicode script "Buhid". 4851 */ 4852 BUHID, 4853 4854 /** 4855 * Unicode script "Tagbanwa". 4856 */ 4857 TAGBANWA, 4858 4859 /** 4860 * Unicode script "Limbu". 4861 */ 4862 LIMBU, 4863 4864 /** 4865 * Unicode script "Tai_Le". 4866 */ 4867 TAI_LE, 4868 4869 /** 4870 * Unicode script "Linear_B". 4871 */ 4872 LINEAR_B, 4873 4874 /** 4875 * Unicode script "Ugaritic". 4876 */ 4877 UGARITIC, 4878 4879 /** 4880 * Unicode script "Shavian". 4881 */ 4882 SHAVIAN, 4883 4884 /** 4885 * Unicode script "Osmanya". 4886 */ 4887 OSMANYA, 4888 4889 /** 4890 * Unicode script "Cypriot". 4891 */ 4892 CYPRIOT, 4893 4894 /** 4895 * Unicode script "Braille". 4896 */ 4897 BRAILLE, 4898 4899 /** 4900 * Unicode script "Buginese". 4901 */ 4902 BUGINESE, 4903 4904 /** 4905 * Unicode script "Coptic". 4906 */ 4907 COPTIC, 4908 4909 /** 4910 * Unicode script "New_Tai_Lue". 4911 */ 4912 NEW_TAI_LUE, 4913 4914 /** 4915 * Unicode script "Glagolitic". 4916 */ 4917 GLAGOLITIC, 4918 4919 /** 4920 * Unicode script "Tifinagh". 4921 */ 4922 TIFINAGH, 4923 4924 /** 4925 * Unicode script "Syloti_Nagri". 4926 */ 4927 SYLOTI_NAGRI, 4928 4929 /** 4930 * Unicode script "Old_Persian". 4931 */ 4932 OLD_PERSIAN, 4933 4934 /** 4935 * Unicode script "Kharoshthi". 4936 */ 4937 KHAROSHTHI, 4938 4939 /** 4940 * Unicode script "Balinese". 4941 */ 4942 BALINESE, 4943 4944 /** 4945 * Unicode script "Cuneiform". 4946 */ 4947 CUNEIFORM, 4948 4949 /** 4950 * Unicode script "Phoenician". 4951 */ 4952 PHOENICIAN, 4953 4954 /** 4955 * Unicode script "Phags_Pa". 4956 */ 4957 PHAGS_PA, 4958 4959 /** 4960 * Unicode script "Nko". 4961 */ 4962 NKO, 4963 4964 /** 4965 * Unicode script "Sundanese". 4966 */ 4967 SUNDANESE, 4968 4969 /** 4970 * Unicode script "Batak". 4971 */ 4972 BATAK, 4973 4974 /** 4975 * Unicode script "Lepcha". 4976 */ 4977 LEPCHA, 4978 4979 /** 4980 * Unicode script "Ol_Chiki". 4981 */ 4982 OL_CHIKI, 4983 4984 /** 4985 * Unicode script "Vai". 4986 */ 4987 VAI, 4988 4989 /** 4990 * Unicode script "Saurashtra". 4991 */ 4992 SAURASHTRA, 4993 4994 /** 4995 * Unicode script "Kayah_Li". 4996 */ 4997 KAYAH_LI, 4998 4999 /** 5000 * Unicode script "Rejang". 5001 */ 5002 REJANG, 5003 5004 /** 5005 * Unicode script "Lycian". 5006 */ 5007 LYCIAN, 5008 5009 /** 5010 * Unicode script "Carian". 5011 */ 5012 CARIAN, 5013 5014 /** 5015 * Unicode script "Lydian". 5016 */ 5017 LYDIAN, 5018 5019 /** 5020 * Unicode script "Cham". 5021 */ 5022 CHAM, 5023 5024 /** 5025 * Unicode script "Tai_Tham". 5026 */ 5027 TAI_THAM, 5028 5029 /** 5030 * Unicode script "Tai_Viet". 5031 */ 5032 TAI_VIET, 5033 5034 /** 5035 * Unicode script "Avestan". 5036 */ 5037 AVESTAN, 5038 5039 /** 5040 * Unicode script "Egyptian_Hieroglyphs". 5041 */ 5042 EGYPTIAN_HIEROGLYPHS, 5043 5044 /** 5045 * Unicode script "Samaritan". 5046 */ 5047 SAMARITAN, 5048 5049 /** 5050 * Unicode script "Mandaic". 5051 */ 5052 MANDAIC, 5053 5054 /** 5055 * Unicode script "Lisu". 5056 */ 5057 LISU, 5058 5059 /** 5060 * Unicode script "Bamum". 5061 */ 5062 BAMUM, 5063 5064 /** 5065 * Unicode script "Javanese". 5066 */ 5067 JAVANESE, 5068 5069 /** 5070 * Unicode script "Meetei_Mayek". 5071 */ 5072 MEETEI_MAYEK, 5073 5074 /** 5075 * Unicode script "Imperial_Aramaic". 5076 */ 5077 IMPERIAL_ARAMAIC, 5078 5079 /** 5080 * Unicode script "Old_South_Arabian". 5081 */ 5082 OLD_SOUTH_ARABIAN, 5083 5084 /** 5085 * Unicode script "Inscriptional_Parthian". 5086 */ 5087 INSCRIPTIONAL_PARTHIAN, 5088 5089 /** 5090 * Unicode script "Inscriptional_Pahlavi". 5091 */ 5092 INSCRIPTIONAL_PAHLAVI, 5093 5094 /** 5095 * Unicode script "Old_Turkic". 5096 */ 5097 OLD_TURKIC, 5098 5099 /** 5100 * Unicode script "Brahmi". 5101 */ 5102 BRAHMI, 5103 5104 /** 5105 * Unicode script "Kaithi". 5106 */ 5107 KAITHI, 5108 5109 /** 5110 * Unicode script "Meroitic Hieroglyphs". 5111 * @since 1.8 5112 */ 5113 MEROITIC_HIEROGLYPHS, 5114 5115 /** 5116 * Unicode script "Meroitic Cursive". 5117 * @since 1.8 5118 */ 5119 MEROITIC_CURSIVE, 5120 5121 /** 5122 * Unicode script "Sora Sompeng". 5123 * @since 1.8 5124 */ 5125 SORA_SOMPENG, 5126 5127 /** 5128 * Unicode script "Chakma". 5129 * @since 1.8 5130 */ 5131 CHAKMA, 5132 5133 /** 5134 * Unicode script "Sharada". 5135 * @since 1.8 5136 */ 5137 SHARADA, 5138 5139 /** 5140 * Unicode script "Takri". 5141 * @since 1.8 5142 */ 5143 TAKRI, 5144 5145 /** 5146 * Unicode script "Miao". 5147 * @since 1.8 5148 */ 5149 MIAO, 5150 5151 /** 5152 * Unicode script "Caucasian Albanian". 5153 * @since 9 5154 */ 5155 CAUCASIAN_ALBANIAN, 5156 5157 /** 5158 * Unicode script "Bassa Vah". 5159 * @since 9 5160 */ 5161 BASSA_VAH, 5162 5163 /** 5164 * Unicode script "Duployan". 5165 * @since 9 5166 */ 5167 DUPLOYAN, 5168 5169 /** 5170 * Unicode script "Elbasan". 5171 * @since 9 5172 */ 5173 ELBASAN, 5174 5175 /** 5176 * Unicode script "Grantha". 5177 * @since 9 5178 */ 5179 GRANTHA, 5180 5181 /** 5182 * Unicode script "Pahawh Hmong". 5183 * @since 9 5184 */ 5185 PAHAWH_HMONG, 5186 5187 /** 5188 * Unicode script "Khojki". 5189 * @since 9 5190 */ 5191 KHOJKI, 5192 5193 /** 5194 * Unicode script "Linear A". 5195 * @since 9 5196 */ 5197 LINEAR_A, 5198 5199 /** 5200 * Unicode script "Mahajani". 5201 * @since 9 5202 */ 5203 MAHAJANI, 5204 5205 /** 5206 * Unicode script "Manichaean". 5207 * @since 9 5208 */ 5209 MANICHAEAN, 5210 5211 /** 5212 * Unicode script "Mende Kikakui". 5213 * @since 9 5214 */ 5215 MENDE_KIKAKUI, 5216 5217 /** 5218 * Unicode script "Modi". 5219 * @since 9 5220 */ 5221 MODI, 5222 5223 /** 5224 * Unicode script "Mro". 5225 * @since 9 5226 */ 5227 MRO, 5228 5229 /** 5230 * Unicode script "Old North Arabian". 5231 * @since 9 5232 */ 5233 OLD_NORTH_ARABIAN, 5234 5235 /** 5236 * Unicode script "Nabataean". 5237 * @since 9 5238 */ 5239 NABATAEAN, 5240 5241 /** 5242 * Unicode script "Palmyrene". 5243 * @since 9 5244 */ 5245 PALMYRENE, 5246 5247 /** 5248 * Unicode script "Pau Cin Hau". 5249 * @since 9 5250 */ 5251 PAU_CIN_HAU, 5252 5253 /** 5254 * Unicode script "Old Permic". 5255 * @since 9 5256 */ 5257 OLD_PERMIC, 5258 5259 /** 5260 * Unicode script "Psalter Pahlavi". 5261 * @since 9 5262 */ 5263 PSALTER_PAHLAVI, 5264 5265 /** 5266 * Unicode script "Siddham". 5267 * @since 9 5268 */ 5269 SIDDHAM, 5270 5271 /** 5272 * Unicode script "Khudawadi". 5273 * @since 9 5274 */ 5275 KHUDAWADI, 5276 5277 /** 5278 * Unicode script "Tirhuta". 5279 * @since 9 5280 */ 5281 TIRHUTA, 5282 5283 /** 5284 * Unicode script "Warang Citi". 5285 * @since 9 5286 */ 5287 WARANG_CITI, 5288 5289 /** 5290 * Unicode script "Ahom". 5291 * @since 9 5292 */ 5293 AHOM, 5294 5295 /** 5296 * Unicode script "Anatolian Hieroglyphs". 5297 * @since 9 5298 */ 5299 ANATOLIAN_HIEROGLYPHS, 5300 5301 /** 5302 * Unicode script "Hatran". 5303 * @since 9 5304 */ 5305 HATRAN, 5306 5307 /** 5308 * Unicode script "Multani". 5309 * @since 9 5310 */ 5311 MULTANI, 5312 5313 /** 5314 * Unicode script "Old Hungarian". 5315 * @since 9 5316 */ 5317 OLD_HUNGARIAN, 5318 5319 /** 5320 * Unicode script "SignWriting". 5321 * @since 9 5322 */ 5323 SIGNWRITING, 5324 5325 /** 5326 * Unicode script "Adlam". 5327 * @since 11 5328 */ 5329 ADLAM, 5330 5331 /** 5332 * Unicode script "Bhaiksuki". 5333 * @since 11 5334 */ 5335 BHAIKSUKI, 5336 5337 /** 5338 * Unicode script "Marchen". 5339 * @since 11 5340 */ 5341 MARCHEN, 5342 5343 /** 5344 * Unicode script "Newa". 5345 * @since 11 5346 */ 5347 NEWA, 5348 5349 /** 5350 * Unicode script "Osage". 5351 * @since 11 5352 */ 5353 OSAGE, 5354 5355 /** 5356 * Unicode script "Tangut". 5357 * @since 11 5358 */ 5359 TANGUT, 5360 5361 /** 5362 * Unicode script "Masaram Gondi". 5363 * @since 11 5364 */ 5365 MASARAM_GONDI, 5366 5367 /** 5368 * Unicode script "Nushu". 5369 * @since 11 5370 */ 5371 NUSHU, 5372 5373 /** 5374 * Unicode script "Soyombo". 5375 * @since 11 5376 */ 5377 SOYOMBO, 5378 5379 /** 5380 * Unicode script "Zanabazar Square". 5381 * @since 11 5382 */ 5383 ZANABAZAR_SQUARE, 5384 5385 /** 5386 * Unicode script "Hanifi Rohingya". 5387 * @since 12 5388 */ 5389 HANIFI_ROHINGYA, 5390 5391 /** 5392 * Unicode script "Old Sogdian". 5393 * @since 12 5394 */ 5395 OLD_SOGDIAN, 5396 5397 /** 5398 * Unicode script "Sogdian". 5399 * @since 12 5400 */ 5401 SOGDIAN, 5402 5403 /** 5404 * Unicode script "Dogra". 5405 * @since 12 5406 */ 5407 DOGRA, 5408 5409 /** 5410 * Unicode script "Gunjala Gondi". 5411 * @since 12 5412 */ 5413 GUNJALA_GONDI, 5414 5415 /** 5416 * Unicode script "Makasar". 5417 * @since 12 5418 */ 5419 MAKASAR, 5420 5421 /** 5422 * Unicode script "Medefaidrin". 5423 * @since 12 5424 */ 5425 MEDEFAIDRIN, 5426 5427 /** 5428 * Unicode script "Elymaic". 5429 * @since 13 5430 */ 5431 ELYMAIC, 5432 5433 /** 5434 * Unicode script "Nandinagari". 5435 * @since 13 5436 */ 5437 NANDINAGARI, 5438 5439 /** 5440 * Unicode script "Nyiakeng Puachue Hmong". 5441 * @since 13 5442 */ 5443 NYIAKENG_PUACHUE_HMONG, 5444 5445 /** 5446 * Unicode script "Wancho". 5447 * @since 13 5448 */ 5449 WANCHO, 5450 5451 /** 5452 * Unicode script "Yezidi". 5453 * @since 15 5454 */ 5455 YEZIDI, 5456 5457 /** 5458 * Unicode script "Chorasmian". 5459 * @since 15 5460 */ 5461 CHORASMIAN, 5462 5463 /** 5464 * Unicode script "Dives Akuru". 5465 * @since 15 5466 */ 5467 DIVES_AKURU, 5468 5469 /** 5470 * Unicode script "Khitan Small Script". 5471 * @since 15 5472 */ 5473 KHITAN_SMALL_SCRIPT, 5474 5475 /** 5476 * Unicode script "Vithkuqi". 5477 * @since 19 5478 */ 5479 VITHKUQI, 5480 5481 /** 5482 * Unicode script "Old Uyghur". 5483 * @since 19 5484 */ 5485 OLD_UYGHUR, 5486 5487 /** 5488 * Unicode script "Cypro Minoan". 5489 * @since 19 5490 */ 5491 CYPRO_MINOAN, 5492 5493 /** 5494 * Unicode script "Tangsa". 5495 * @since 19 5496 */ 5497 TANGSA, 5498 5499 /** 5500 * Unicode script "Toto". 5501 * @since 19 5502 */ 5503 TOTO, 5504 5505 /** 5506 * Unicode script "Kawi". 5507 * @since 20 5508 */ 5509 KAWI, 5510 5511 /** 5512 * Unicode script "Nag Mundari". 5513 * @since 20 5514 */ 5515 NAG_MUNDARI, 5516 5517 /** 5518 * Unicode script "Todhri". 5519 * @since 24 5520 */ 5521 TODHRI, 5522 5523 /** 5524 * Unicode script "Garay". 5525 * @since 24 5526 */ 5527 GARAY, 5528 5529 /** 5530 * Unicode script "Tulu Tigalari". 5531 * @since 24 5532 */ 5533 TULU_TIGALARI, 5534 5535 /** 5536 * Unicode script "Sunuwar". 5537 * @since 24 5538 */ 5539 SUNUWAR, 5540 5541 /** 5542 * Unicode script "Gurung Khema". 5543 * @since 24 5544 */ 5545 GURUNG_KHEMA, 5546 5547 /** 5548 * Unicode script "Kirat Rai". 5549 * @since 24 5550 */ 5551 KIRAT_RAI, 5552 5553 /** 5554 * Unicode script "Ol Onal". 5555 * @since 24 5556 */ 5557 OL_ONAL, 5558 5559 /** 5560 * Unicode script "Unknown". 5561 */ 5562 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5563 5564 private static final int[] scriptStarts = { 5565 0x0000, // 0000..0040; COMMON 5566 0x0041, // 0041..005A; LATIN 5567 0x005B, // 005B..0060; COMMON 5568 0x0061, // 0061..007A; LATIN 5569 0x007B, // 007B..00A9; COMMON 5570 0x00AA, // 00AA ; LATIN 5571 0x00AB, // 00AB..00B9; COMMON 5572 0x00BA, // 00BA ; LATIN 5573 0x00BB, // 00BB..00BF; COMMON 5574 0x00C0, // 00C0..00D6; LATIN 5575 0x00D7, // 00D7 ; COMMON 5576 0x00D8, // 00D8..00F6; LATIN 5577 0x00F7, // 00F7 ; COMMON 5578 0x00F8, // 00F8..02B8; LATIN 5579 0x02B9, // 02B9..02DF; COMMON 5580 0x02E0, // 02E0..02E4; LATIN 5581 0x02E5, // 02E5..02E9; COMMON 5582 0x02EA, // 02EA..02EB; BOPOMOFO 5583 0x02EC, // 02EC..02FF; COMMON 5584 0x0300, // 0300..036F; INHERITED 5585 0x0370, // 0370..0373; GREEK 5586 0x0374, // 0374 ; COMMON 5587 0x0375, // 0375..0377; GREEK 5588 0x0378, // 0378..0379; UNKNOWN 5589 0x037A, // 037A..037D; GREEK 5590 0x037E, // 037E ; COMMON 5591 0x037F, // 037F ; GREEK 5592 0x0380, // 0380..0383; UNKNOWN 5593 0x0384, // 0384 ; GREEK 5594 0x0385, // 0385 ; COMMON 5595 0x0386, // 0386 ; GREEK 5596 0x0387, // 0387 ; COMMON 5597 0x0388, // 0388..038A; GREEK 5598 0x038B, // 038B ; UNKNOWN 5599 0x038C, // 038C ; GREEK 5600 0x038D, // 038D ; UNKNOWN 5601 0x038E, // 038E..03A1; GREEK 5602 0x03A2, // 03A2 ; UNKNOWN 5603 0x03A3, // 03A3..03E1; GREEK 5604 0x03E2, // 03E2..03EF; COPTIC 5605 0x03F0, // 03F0..03FF; GREEK 5606 0x0400, // 0400..0484; CYRILLIC 5607 0x0485, // 0485..0486; INHERITED 5608 0x0487, // 0487..052F; CYRILLIC 5609 0x0530, // 0530 ; UNKNOWN 5610 0x0531, // 0531..0556; ARMENIAN 5611 0x0557, // 0557..0558; UNKNOWN 5612 0x0559, // 0559..058A; ARMENIAN 5613 0x058B, // 058B..058C; UNKNOWN 5614 0x058D, // 058D..058F; ARMENIAN 5615 0x0590, // 0590 ; UNKNOWN 5616 0x0591, // 0591..05C7; HEBREW 5617 0x05C8, // 05C8..05CF; UNKNOWN 5618 0x05D0, // 05D0..05EA; HEBREW 5619 0x05EB, // 05EB..05EE; UNKNOWN 5620 0x05EF, // 05EF..05F4; HEBREW 5621 0x05F5, // 05F5..05FF; UNKNOWN 5622 0x0600, // 0600..0604; ARABIC 5623 0x0605, // 0605 ; COMMON 5624 0x0606, // 0606..060B; ARABIC 5625 0x060C, // 060C ; COMMON 5626 0x060D, // 060D..061A; ARABIC 5627 0x061B, // 061B ; COMMON 5628 0x061C, // 061C..061E; ARABIC 5629 0x061F, // 061F ; COMMON 5630 0x0620, // 0620..063F; ARABIC 5631 0x0640, // 0640 ; COMMON 5632 0x0641, // 0641..064A; ARABIC 5633 0x064B, // 064B..0655; INHERITED 5634 0x0656, // 0656..066F; ARABIC 5635 0x0670, // 0670 ; INHERITED 5636 0x0671, // 0671..06DC; ARABIC 5637 0x06DD, // 06DD ; COMMON 5638 0x06DE, // 06DE..06FF; ARABIC 5639 0x0700, // 0700..070D; SYRIAC 5640 0x070E, // 070E ; UNKNOWN 5641 0x070F, // 070F..074A; SYRIAC 5642 0x074B, // 074B..074C; UNKNOWN 5643 0x074D, // 074D..074F; SYRIAC 5644 0x0750, // 0750..077F; ARABIC 5645 0x0780, // 0780..07B1; THAANA 5646 0x07B2, // 07B2..07BF; UNKNOWN 5647 0x07C0, // 07C0..07FA; NKO 5648 0x07FB, // 07FB..07FC; UNKNOWN 5649 0x07FD, // 07FD..07FF; NKO 5650 0x0800, // 0800..082D; SAMARITAN 5651 0x082E, // 082E..082F; UNKNOWN 5652 0x0830, // 0830..083E; SAMARITAN 5653 0x083F, // 083F ; UNKNOWN 5654 0x0840, // 0840..085B; MANDAIC 5655 0x085C, // 085C..085D; UNKNOWN 5656 0x085E, // 085E ; MANDAIC 5657 0x085F, // 085F ; UNKNOWN 5658 0x0860, // 0860..086A; SYRIAC 5659 0x086B, // 086B..086F; UNKNOWN 5660 0x0870, // 0870..088E; ARABIC 5661 0x088F, // 088F ; UNKNOWN 5662 0x0890, // 0890..0891; ARABIC 5663 0x0892, // 0892..0896; UNKNOWN 5664 0x0897, // 0897..08E1; ARABIC 5665 0x08E2, // 08E2 ; COMMON 5666 0x08E3, // 08E3..08FF; ARABIC 5667 0x0900, // 0900..0950; DEVANAGARI 5668 0x0951, // 0951..0954; INHERITED 5669 0x0955, // 0955..0963; DEVANAGARI 5670 0x0964, // 0964..0965; COMMON 5671 0x0966, // 0966..097F; DEVANAGARI 5672 0x0980, // 0980..0983; BENGALI 5673 0x0984, // 0984 ; UNKNOWN 5674 0x0985, // 0985..098C; BENGALI 5675 0x098D, // 098D..098E; UNKNOWN 5676 0x098F, // 098F..0990; BENGALI 5677 0x0991, // 0991..0992; UNKNOWN 5678 0x0993, // 0993..09A8; BENGALI 5679 0x09A9, // 09A9 ; UNKNOWN 5680 0x09AA, // 09AA..09B0; BENGALI 5681 0x09B1, // 09B1 ; UNKNOWN 5682 0x09B2, // 09B2 ; BENGALI 5683 0x09B3, // 09B3..09B5; UNKNOWN 5684 0x09B6, // 09B6..09B9; BENGALI 5685 0x09BA, // 09BA..09BB; UNKNOWN 5686 0x09BC, // 09BC..09C4; BENGALI 5687 0x09C5, // 09C5..09C6; UNKNOWN 5688 0x09C7, // 09C7..09C8; BENGALI 5689 0x09C9, // 09C9..09CA; UNKNOWN 5690 0x09CB, // 09CB..09CE; BENGALI 5691 0x09CF, // 09CF..09D6; UNKNOWN 5692 0x09D7, // 09D7 ; BENGALI 5693 0x09D8, // 09D8..09DB; UNKNOWN 5694 0x09DC, // 09DC..09DD; BENGALI 5695 0x09DE, // 09DE ; UNKNOWN 5696 0x09DF, // 09DF..09E3; BENGALI 5697 0x09E4, // 09E4..09E5; UNKNOWN 5698 0x09E6, // 09E6..09FE; BENGALI 5699 0x09FF, // 09FF..0A00; UNKNOWN 5700 0x0A01, // 0A01..0A03; GURMUKHI 5701 0x0A04, // 0A04 ; UNKNOWN 5702 0x0A05, // 0A05..0A0A; GURMUKHI 5703 0x0A0B, // 0A0B..0A0E; UNKNOWN 5704 0x0A0F, // 0A0F..0A10; GURMUKHI 5705 0x0A11, // 0A11..0A12; UNKNOWN 5706 0x0A13, // 0A13..0A28; GURMUKHI 5707 0x0A29, // 0A29 ; UNKNOWN 5708 0x0A2A, // 0A2A..0A30; GURMUKHI 5709 0x0A31, // 0A31 ; UNKNOWN 5710 0x0A32, // 0A32..0A33; GURMUKHI 5711 0x0A34, // 0A34 ; UNKNOWN 5712 0x0A35, // 0A35..0A36; GURMUKHI 5713 0x0A37, // 0A37 ; UNKNOWN 5714 0x0A38, // 0A38..0A39; GURMUKHI 5715 0x0A3A, // 0A3A..0A3B; UNKNOWN 5716 0x0A3C, // 0A3C ; GURMUKHI 5717 0x0A3D, // 0A3D ; UNKNOWN 5718 0x0A3E, // 0A3E..0A42; GURMUKHI 5719 0x0A43, // 0A43..0A46; UNKNOWN 5720 0x0A47, // 0A47..0A48; GURMUKHI 5721 0x0A49, // 0A49..0A4A; UNKNOWN 5722 0x0A4B, // 0A4B..0A4D; GURMUKHI 5723 0x0A4E, // 0A4E..0A50; UNKNOWN 5724 0x0A51, // 0A51 ; GURMUKHI 5725 0x0A52, // 0A52..0A58; UNKNOWN 5726 0x0A59, // 0A59..0A5C; GURMUKHI 5727 0x0A5D, // 0A5D ; UNKNOWN 5728 0x0A5E, // 0A5E ; GURMUKHI 5729 0x0A5F, // 0A5F..0A65; UNKNOWN 5730 0x0A66, // 0A66..0A76; GURMUKHI 5731 0x0A77, // 0A77..0A80; UNKNOWN 5732 0x0A81, // 0A81..0A83; GUJARATI 5733 0x0A84, // 0A84 ; UNKNOWN 5734 0x0A85, // 0A85..0A8D; GUJARATI 5735 0x0A8E, // 0A8E ; UNKNOWN 5736 0x0A8F, // 0A8F..0A91; GUJARATI 5737 0x0A92, // 0A92 ; UNKNOWN 5738 0x0A93, // 0A93..0AA8; GUJARATI 5739 0x0AA9, // 0AA9 ; UNKNOWN 5740 0x0AAA, // 0AAA..0AB0; GUJARATI 5741 0x0AB1, // 0AB1 ; UNKNOWN 5742 0x0AB2, // 0AB2..0AB3; GUJARATI 5743 0x0AB4, // 0AB4 ; UNKNOWN 5744 0x0AB5, // 0AB5..0AB9; GUJARATI 5745 0x0ABA, // 0ABA..0ABB; UNKNOWN 5746 0x0ABC, // 0ABC..0AC5; GUJARATI 5747 0x0AC6, // 0AC6 ; UNKNOWN 5748 0x0AC7, // 0AC7..0AC9; GUJARATI 5749 0x0ACA, // 0ACA ; UNKNOWN 5750 0x0ACB, // 0ACB..0ACD; GUJARATI 5751 0x0ACE, // 0ACE..0ACF; UNKNOWN 5752 0x0AD0, // 0AD0 ; GUJARATI 5753 0x0AD1, // 0AD1..0ADF; UNKNOWN 5754 0x0AE0, // 0AE0..0AE3; GUJARATI 5755 0x0AE4, // 0AE4..0AE5; UNKNOWN 5756 0x0AE6, // 0AE6..0AF1; GUJARATI 5757 0x0AF2, // 0AF2..0AF8; UNKNOWN 5758 0x0AF9, // 0AF9..0AFF; GUJARATI 5759 0x0B00, // 0B00 ; UNKNOWN 5760 0x0B01, // 0B01..0B03; ORIYA 5761 0x0B04, // 0B04 ; UNKNOWN 5762 0x0B05, // 0B05..0B0C; ORIYA 5763 0x0B0D, // 0B0D..0B0E; UNKNOWN 5764 0x0B0F, // 0B0F..0B10; ORIYA 5765 0x0B11, // 0B11..0B12; UNKNOWN 5766 0x0B13, // 0B13..0B28; ORIYA 5767 0x0B29, // 0B29 ; UNKNOWN 5768 0x0B2A, // 0B2A..0B30; ORIYA 5769 0x0B31, // 0B31 ; UNKNOWN 5770 0x0B32, // 0B32..0B33; ORIYA 5771 0x0B34, // 0B34 ; UNKNOWN 5772 0x0B35, // 0B35..0B39; ORIYA 5773 0x0B3A, // 0B3A..0B3B; UNKNOWN 5774 0x0B3C, // 0B3C..0B44; ORIYA 5775 0x0B45, // 0B45..0B46; UNKNOWN 5776 0x0B47, // 0B47..0B48; ORIYA 5777 0x0B49, // 0B49..0B4A; UNKNOWN 5778 0x0B4B, // 0B4B..0B4D; ORIYA 5779 0x0B4E, // 0B4E..0B54; UNKNOWN 5780 0x0B55, // 0B55..0B57; ORIYA 5781 0x0B58, // 0B58..0B5B; UNKNOWN 5782 0x0B5C, // 0B5C..0B5D; ORIYA 5783 0x0B5E, // 0B5E ; UNKNOWN 5784 0x0B5F, // 0B5F..0B63; ORIYA 5785 0x0B64, // 0B64..0B65; UNKNOWN 5786 0x0B66, // 0B66..0B77; ORIYA 5787 0x0B78, // 0B78..0B81; UNKNOWN 5788 0x0B82, // 0B82..0B83; TAMIL 5789 0x0B84, // 0B84 ; UNKNOWN 5790 0x0B85, // 0B85..0B8A; TAMIL 5791 0x0B8B, // 0B8B..0B8D; UNKNOWN 5792 0x0B8E, // 0B8E..0B90; TAMIL 5793 0x0B91, // 0B91 ; UNKNOWN 5794 0x0B92, // 0B92..0B95; TAMIL 5795 0x0B96, // 0B96..0B98; UNKNOWN 5796 0x0B99, // 0B99..0B9A; TAMIL 5797 0x0B9B, // 0B9B ; UNKNOWN 5798 0x0B9C, // 0B9C ; TAMIL 5799 0x0B9D, // 0B9D ; UNKNOWN 5800 0x0B9E, // 0B9E..0B9F; TAMIL 5801 0x0BA0, // 0BA0..0BA2; UNKNOWN 5802 0x0BA3, // 0BA3..0BA4; TAMIL 5803 0x0BA5, // 0BA5..0BA7; UNKNOWN 5804 0x0BA8, // 0BA8..0BAA; TAMIL 5805 0x0BAB, // 0BAB..0BAD; UNKNOWN 5806 0x0BAE, // 0BAE..0BB9; TAMIL 5807 0x0BBA, // 0BBA..0BBD; UNKNOWN 5808 0x0BBE, // 0BBE..0BC2; TAMIL 5809 0x0BC3, // 0BC3..0BC5; UNKNOWN 5810 0x0BC6, // 0BC6..0BC8; TAMIL 5811 0x0BC9, // 0BC9 ; UNKNOWN 5812 0x0BCA, // 0BCA..0BCD; TAMIL 5813 0x0BCE, // 0BCE..0BCF; UNKNOWN 5814 0x0BD0, // 0BD0 ; TAMIL 5815 0x0BD1, // 0BD1..0BD6; UNKNOWN 5816 0x0BD7, // 0BD7 ; TAMIL 5817 0x0BD8, // 0BD8..0BE5; UNKNOWN 5818 0x0BE6, // 0BE6..0BFA; TAMIL 5819 0x0BFB, // 0BFB..0BFF; UNKNOWN 5820 0x0C00, // 0C00..0C0C; TELUGU 5821 0x0C0D, // 0C0D ; UNKNOWN 5822 0x0C0E, // 0C0E..0C10; TELUGU 5823 0x0C11, // 0C11 ; UNKNOWN 5824 0x0C12, // 0C12..0C28; TELUGU 5825 0x0C29, // 0C29 ; UNKNOWN 5826 0x0C2A, // 0C2A..0C39; TELUGU 5827 0x0C3A, // 0C3A..0C3B; UNKNOWN 5828 0x0C3C, // 0C3C..0C44; TELUGU 5829 0x0C45, // 0C45 ; UNKNOWN 5830 0x0C46, // 0C46..0C48; TELUGU 5831 0x0C49, // 0C49 ; UNKNOWN 5832 0x0C4A, // 0C4A..0C4D; TELUGU 5833 0x0C4E, // 0C4E..0C54; UNKNOWN 5834 0x0C55, // 0C55..0C56; TELUGU 5835 0x0C57, // 0C57 ; UNKNOWN 5836 0x0C58, // 0C58..0C5A; TELUGU 5837 0x0C5B, // 0C5B..0C5C; UNKNOWN 5838 0x0C5D, // 0C5D ; TELUGU 5839 0x0C5E, // 0C5E..0C5F; UNKNOWN 5840 0x0C60, // 0C60..0C63; TELUGU 5841 0x0C64, // 0C64..0C65; UNKNOWN 5842 0x0C66, // 0C66..0C6F; TELUGU 5843 0x0C70, // 0C70..0C76; UNKNOWN 5844 0x0C77, // 0C77..0C7F; TELUGU 5845 0x0C80, // 0C80..0C8C; KANNADA 5846 0x0C8D, // 0C8D ; UNKNOWN 5847 0x0C8E, // 0C8E..0C90; KANNADA 5848 0x0C91, // 0C91 ; UNKNOWN 5849 0x0C92, // 0C92..0CA8; KANNADA 5850 0x0CA9, // 0CA9 ; UNKNOWN 5851 0x0CAA, // 0CAA..0CB3; KANNADA 5852 0x0CB4, // 0CB4 ; UNKNOWN 5853 0x0CB5, // 0CB5..0CB9; KANNADA 5854 0x0CBA, // 0CBA..0CBB; UNKNOWN 5855 0x0CBC, // 0CBC..0CC4; KANNADA 5856 0x0CC5, // 0CC5 ; UNKNOWN 5857 0x0CC6, // 0CC6..0CC8; KANNADA 5858 0x0CC9, // 0CC9 ; UNKNOWN 5859 0x0CCA, // 0CCA..0CCD; KANNADA 5860 0x0CCE, // 0CCE..0CD4; UNKNOWN 5861 0x0CD5, // 0CD5..0CD6; KANNADA 5862 0x0CD7, // 0CD7..0CDC; UNKNOWN 5863 0x0CDD, // 0CDD..0CDE; KANNADA 5864 0x0CDF, // 0CDF ; UNKNOWN 5865 0x0CE0, // 0CE0..0CE3; KANNADA 5866 0x0CE4, // 0CE4..0CE5; UNKNOWN 5867 0x0CE6, // 0CE6..0CEF; KANNADA 5868 0x0CF0, // 0CF0 ; UNKNOWN 5869 0x0CF1, // 0CF1..0CF3; KANNADA 5870 0x0CF4, // 0CF4..0CFF; UNKNOWN 5871 0x0D00, // 0D00..0D0C; MALAYALAM 5872 0x0D0D, // 0D0D ; UNKNOWN 5873 0x0D0E, // 0D0E..0D10; MALAYALAM 5874 0x0D11, // 0D11 ; UNKNOWN 5875 0x0D12, // 0D12..0D44; MALAYALAM 5876 0x0D45, // 0D45 ; UNKNOWN 5877 0x0D46, // 0D46..0D48; MALAYALAM 5878 0x0D49, // 0D49 ; UNKNOWN 5879 0x0D4A, // 0D4A..0D4F; MALAYALAM 5880 0x0D50, // 0D50..0D53; UNKNOWN 5881 0x0D54, // 0D54..0D63; MALAYALAM 5882 0x0D64, // 0D64..0D65; UNKNOWN 5883 0x0D66, // 0D66..0D7F; MALAYALAM 5884 0x0D80, // 0D80 ; UNKNOWN 5885 0x0D81, // 0D81..0D83; SINHALA 5886 0x0D84, // 0D84 ; UNKNOWN 5887 0x0D85, // 0D85..0D96; SINHALA 5888 0x0D97, // 0D97..0D99; UNKNOWN 5889 0x0D9A, // 0D9A..0DB1; SINHALA 5890 0x0DB2, // 0DB2 ; UNKNOWN 5891 0x0DB3, // 0DB3..0DBB; SINHALA 5892 0x0DBC, // 0DBC ; UNKNOWN 5893 0x0DBD, // 0DBD ; SINHALA 5894 0x0DBE, // 0DBE..0DBF; UNKNOWN 5895 0x0DC0, // 0DC0..0DC6; SINHALA 5896 0x0DC7, // 0DC7..0DC9; UNKNOWN 5897 0x0DCA, // 0DCA ; SINHALA 5898 0x0DCB, // 0DCB..0DCE; UNKNOWN 5899 0x0DCF, // 0DCF..0DD4; SINHALA 5900 0x0DD5, // 0DD5 ; UNKNOWN 5901 0x0DD6, // 0DD6 ; SINHALA 5902 0x0DD7, // 0DD7 ; UNKNOWN 5903 0x0DD8, // 0DD8..0DDF; SINHALA 5904 0x0DE0, // 0DE0..0DE5; UNKNOWN 5905 0x0DE6, // 0DE6..0DEF; SINHALA 5906 0x0DF0, // 0DF0..0DF1; UNKNOWN 5907 0x0DF2, // 0DF2..0DF4; SINHALA 5908 0x0DF5, // 0DF5..0E00; UNKNOWN 5909 0x0E01, // 0E01..0E3A; THAI 5910 0x0E3B, // 0E3B..0E3E; UNKNOWN 5911 0x0E3F, // 0E3F ; COMMON 5912 0x0E40, // 0E40..0E5B; THAI 5913 0x0E5C, // 0E5C..0E80; UNKNOWN 5914 0x0E81, // 0E81..0E82; LAO 5915 0x0E83, // 0E83 ; UNKNOWN 5916 0x0E84, // 0E84 ; LAO 5917 0x0E85, // 0E85 ; UNKNOWN 5918 0x0E86, // 0E86..0E8A; LAO 5919 0x0E8B, // 0E8B ; UNKNOWN 5920 0x0E8C, // 0E8C..0EA3; LAO 5921 0x0EA4, // 0EA4 ; UNKNOWN 5922 0x0EA5, // 0EA5 ; LAO 5923 0x0EA6, // 0EA6 ; UNKNOWN 5924 0x0EA7, // 0EA7..0EBD; LAO 5925 0x0EBE, // 0EBE..0EBF; UNKNOWN 5926 0x0EC0, // 0EC0..0EC4; LAO 5927 0x0EC5, // 0EC5 ; UNKNOWN 5928 0x0EC6, // 0EC6 ; LAO 5929 0x0EC7, // 0EC7 ; UNKNOWN 5930 0x0EC8, // 0EC8..0ECE; LAO 5931 0x0ECF, // 0ECF ; UNKNOWN 5932 0x0ED0, // 0ED0..0ED9; LAO 5933 0x0EDA, // 0EDA..0EDB; UNKNOWN 5934 0x0EDC, // 0EDC..0EDF; LAO 5935 0x0EE0, // 0EE0..0EFF; UNKNOWN 5936 0x0F00, // 0F00..0F47; TIBETAN 5937 0x0F48, // 0F48 ; UNKNOWN 5938 0x0F49, // 0F49..0F6C; TIBETAN 5939 0x0F6D, // 0F6D..0F70; UNKNOWN 5940 0x0F71, // 0F71..0F97; TIBETAN 5941 0x0F98, // 0F98 ; UNKNOWN 5942 0x0F99, // 0F99..0FBC; TIBETAN 5943 0x0FBD, // 0FBD ; UNKNOWN 5944 0x0FBE, // 0FBE..0FCC; TIBETAN 5945 0x0FCD, // 0FCD ; UNKNOWN 5946 0x0FCE, // 0FCE..0FD4; TIBETAN 5947 0x0FD5, // 0FD5..0FD8; COMMON 5948 0x0FD9, // 0FD9..0FDA; TIBETAN 5949 0x0FDB, // 0FDB..0FFF; UNKNOWN 5950 0x1000, // 1000..109F; MYANMAR 5951 0x10A0, // 10A0..10C5; GEORGIAN 5952 0x10C6, // 10C6 ; UNKNOWN 5953 0x10C7, // 10C7 ; GEORGIAN 5954 0x10C8, // 10C8..10CC; UNKNOWN 5955 0x10CD, // 10CD ; GEORGIAN 5956 0x10CE, // 10CE..10CF; UNKNOWN 5957 0x10D0, // 10D0..10FA; GEORGIAN 5958 0x10FB, // 10FB ; COMMON 5959 0x10FC, // 10FC..10FF; GEORGIAN 5960 0x1100, // 1100..11FF; HANGUL 5961 0x1200, // 1200..1248; ETHIOPIC 5962 0x1249, // 1249 ; UNKNOWN 5963 0x124A, // 124A..124D; ETHIOPIC 5964 0x124E, // 124E..124F; UNKNOWN 5965 0x1250, // 1250..1256; ETHIOPIC 5966 0x1257, // 1257 ; UNKNOWN 5967 0x1258, // 1258 ; ETHIOPIC 5968 0x1259, // 1259 ; UNKNOWN 5969 0x125A, // 125A..125D; ETHIOPIC 5970 0x125E, // 125E..125F; UNKNOWN 5971 0x1260, // 1260..1288; ETHIOPIC 5972 0x1289, // 1289 ; UNKNOWN 5973 0x128A, // 128A..128D; ETHIOPIC 5974 0x128E, // 128E..128F; UNKNOWN 5975 0x1290, // 1290..12B0; ETHIOPIC 5976 0x12B1, // 12B1 ; UNKNOWN 5977 0x12B2, // 12B2..12B5; ETHIOPIC 5978 0x12B6, // 12B6..12B7; UNKNOWN 5979 0x12B8, // 12B8..12BE; ETHIOPIC 5980 0x12BF, // 12BF ; UNKNOWN 5981 0x12C0, // 12C0 ; ETHIOPIC 5982 0x12C1, // 12C1 ; UNKNOWN 5983 0x12C2, // 12C2..12C5; ETHIOPIC 5984 0x12C6, // 12C6..12C7; UNKNOWN 5985 0x12C8, // 12C8..12D6; ETHIOPIC 5986 0x12D7, // 12D7 ; UNKNOWN 5987 0x12D8, // 12D8..1310; ETHIOPIC 5988 0x1311, // 1311 ; UNKNOWN 5989 0x1312, // 1312..1315; ETHIOPIC 5990 0x1316, // 1316..1317; UNKNOWN 5991 0x1318, // 1318..135A; ETHIOPIC 5992 0x135B, // 135B..135C; UNKNOWN 5993 0x135D, // 135D..137C; ETHIOPIC 5994 0x137D, // 137D..137F; UNKNOWN 5995 0x1380, // 1380..1399; ETHIOPIC 5996 0x139A, // 139A..139F; UNKNOWN 5997 0x13A0, // 13A0..13F5; CHEROKEE 5998 0x13F6, // 13F6..13F7; UNKNOWN 5999 0x13F8, // 13F8..13FD; CHEROKEE 6000 0x13FE, // 13FE..13FF; UNKNOWN 6001 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 6002 0x1680, // 1680..169C; OGHAM 6003 0x169D, // 169D..169F; UNKNOWN 6004 0x16A0, // 16A0..16EA; RUNIC 6005 0x16EB, // 16EB..16ED; COMMON 6006 0x16EE, // 16EE..16F8; RUNIC 6007 0x16F9, // 16F9..16FF; UNKNOWN 6008 0x1700, // 1700..1715; TAGALOG 6009 0x1716, // 1716..171E; UNKNOWN 6010 0x171F, // 171F ; TAGALOG 6011 0x1720, // 1720..1734; HANUNOO 6012 0x1735, // 1735..1736; COMMON 6013 0x1737, // 1737..173F; UNKNOWN 6014 0x1740, // 1740..1753; BUHID 6015 0x1754, // 1754..175F; UNKNOWN 6016 0x1760, // 1760..176C; TAGBANWA 6017 0x176D, // 176D ; UNKNOWN 6018 0x176E, // 176E..1770; TAGBANWA 6019 0x1771, // 1771 ; UNKNOWN 6020 0x1772, // 1772..1773; TAGBANWA 6021 0x1774, // 1774..177F; UNKNOWN 6022 0x1780, // 1780..17DD; KHMER 6023 0x17DE, // 17DE..17DF; UNKNOWN 6024 0x17E0, // 17E0..17E9; KHMER 6025 0x17EA, // 17EA..17EF; UNKNOWN 6026 0x17F0, // 17F0..17F9; KHMER 6027 0x17FA, // 17FA..17FF; UNKNOWN 6028 0x1800, // 1800..1801; MONGOLIAN 6029 0x1802, // 1802..1803; COMMON 6030 0x1804, // 1804 ; MONGOLIAN 6031 0x1805, // 1805 ; COMMON 6032 0x1806, // 1806..1819; MONGOLIAN 6033 0x181A, // 181A..181F; UNKNOWN 6034 0x1820, // 1820..1878; MONGOLIAN 6035 0x1879, // 1879..187F; UNKNOWN 6036 0x1880, // 1880..18AA; MONGOLIAN 6037 0x18AB, // 18AB..18AF; UNKNOWN 6038 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 6039 0x18F6, // 18F6..18FF; UNKNOWN 6040 0x1900, // 1900..191E; LIMBU 6041 0x191F, // 191F ; UNKNOWN 6042 0x1920, // 1920..192B; LIMBU 6043 0x192C, // 192C..192F; UNKNOWN 6044 0x1930, // 1930..193B; LIMBU 6045 0x193C, // 193C..193F; UNKNOWN 6046 0x1940, // 1940 ; LIMBU 6047 0x1941, // 1941..1943; UNKNOWN 6048 0x1944, // 1944..194F; LIMBU 6049 0x1950, // 1950..196D; TAI_LE 6050 0x196E, // 196E..196F; UNKNOWN 6051 0x1970, // 1970..1974; TAI_LE 6052 0x1975, // 1975..197F; UNKNOWN 6053 0x1980, // 1980..19AB; NEW_TAI_LUE 6054 0x19AC, // 19AC..19AF; UNKNOWN 6055 0x19B0, // 19B0..19C9; NEW_TAI_LUE 6056 0x19CA, // 19CA..19CF; UNKNOWN 6057 0x19D0, // 19D0..19DA; NEW_TAI_LUE 6058 0x19DB, // 19DB..19DD; UNKNOWN 6059 0x19DE, // 19DE..19DF; NEW_TAI_LUE 6060 0x19E0, // 19E0..19FF; KHMER 6061 0x1A00, // 1A00..1A1B; BUGINESE 6062 0x1A1C, // 1A1C..1A1D; UNKNOWN 6063 0x1A1E, // 1A1E..1A1F; BUGINESE 6064 0x1A20, // 1A20..1A5E; TAI_THAM 6065 0x1A5F, // 1A5F ; UNKNOWN 6066 0x1A60, // 1A60..1A7C; TAI_THAM 6067 0x1A7D, // 1A7D..1A7E; UNKNOWN 6068 0x1A7F, // 1A7F..1A89; TAI_THAM 6069 0x1A8A, // 1A8A..1A8F; UNKNOWN 6070 0x1A90, // 1A90..1A99; TAI_THAM 6071 0x1A9A, // 1A9A..1A9F; UNKNOWN 6072 0x1AA0, // 1AA0..1AAD; TAI_THAM 6073 0x1AAE, // 1AAE..1AAF; UNKNOWN 6074 0x1AB0, // 1AB0..1ACE; INHERITED 6075 0x1ACF, // 1ACF..1AFF; UNKNOWN 6076 0x1B00, // 1B00..1B4C; BALINESE 6077 0x1B4D, // 1B4D ; UNKNOWN 6078 0x1B4E, // 1B4E..1B7F; BALINESE 6079 0x1B80, // 1B80..1BBF; SUNDANESE 6080 0x1BC0, // 1BC0..1BF3; BATAK 6081 0x1BF4, // 1BF4..1BFB; UNKNOWN 6082 0x1BFC, // 1BFC..1BFF; BATAK 6083 0x1C00, // 1C00..1C37; LEPCHA 6084 0x1C38, // 1C38..1C3A; UNKNOWN 6085 0x1C3B, // 1C3B..1C49; LEPCHA 6086 0x1C4A, // 1C4A..1C4C; UNKNOWN 6087 0x1C4D, // 1C4D..1C4F; LEPCHA 6088 0x1C50, // 1C50..1C7F; OL_CHIKI 6089 0x1C80, // 1C80..1C8A; CYRILLIC 6090 0x1C8B, // 1C8B..1C8F; UNKNOWN 6091 0x1C90, // 1C90..1CBA; GEORGIAN 6092 0x1CBB, // 1CBB..1CBC; UNKNOWN 6093 0x1CBD, // 1CBD..1CBF; GEORGIAN 6094 0x1CC0, // 1CC0..1CC7; SUNDANESE 6095 0x1CC8, // 1CC8..1CCF; UNKNOWN 6096 0x1CD0, // 1CD0..1CD2; INHERITED 6097 0x1CD3, // 1CD3 ; COMMON 6098 0x1CD4, // 1CD4..1CE0; INHERITED 6099 0x1CE1, // 1CE1 ; COMMON 6100 0x1CE2, // 1CE2..1CE8; INHERITED 6101 0x1CE9, // 1CE9..1CEC; COMMON 6102 0x1CED, // 1CED ; INHERITED 6103 0x1CEE, // 1CEE..1CF3; COMMON 6104 0x1CF4, // 1CF4 ; INHERITED 6105 0x1CF5, // 1CF5..1CF7; COMMON 6106 0x1CF8, // 1CF8..1CF9; INHERITED 6107 0x1CFA, // 1CFA ; COMMON 6108 0x1CFB, // 1CFB..1CFF; UNKNOWN 6109 0x1D00, // 1D00..1D25; LATIN 6110 0x1D26, // 1D26..1D2A; GREEK 6111 0x1D2B, // 1D2B ; CYRILLIC 6112 0x1D2C, // 1D2C..1D5C; LATIN 6113 0x1D5D, // 1D5D..1D61; GREEK 6114 0x1D62, // 1D62..1D65; LATIN 6115 0x1D66, // 1D66..1D6A; GREEK 6116 0x1D6B, // 1D6B..1D77; LATIN 6117 0x1D78, // 1D78 ; CYRILLIC 6118 0x1D79, // 1D79..1DBE; LATIN 6119 0x1DBF, // 1DBF ; GREEK 6120 0x1DC0, // 1DC0..1DFF; INHERITED 6121 0x1E00, // 1E00..1EFF; LATIN 6122 0x1F00, // 1F00..1F15; GREEK 6123 0x1F16, // 1F16..1F17; UNKNOWN 6124 0x1F18, // 1F18..1F1D; GREEK 6125 0x1F1E, // 1F1E..1F1F; UNKNOWN 6126 0x1F20, // 1F20..1F45; GREEK 6127 0x1F46, // 1F46..1F47; UNKNOWN 6128 0x1F48, // 1F48..1F4D; GREEK 6129 0x1F4E, // 1F4E..1F4F; UNKNOWN 6130 0x1F50, // 1F50..1F57; GREEK 6131 0x1F58, // 1F58 ; UNKNOWN 6132 0x1F59, // 1F59 ; GREEK 6133 0x1F5A, // 1F5A ; UNKNOWN 6134 0x1F5B, // 1F5B ; GREEK 6135 0x1F5C, // 1F5C ; UNKNOWN 6136 0x1F5D, // 1F5D ; GREEK 6137 0x1F5E, // 1F5E ; UNKNOWN 6138 0x1F5F, // 1F5F..1F7D; GREEK 6139 0x1F7E, // 1F7E..1F7F; UNKNOWN 6140 0x1F80, // 1F80..1FB4; GREEK 6141 0x1FB5, // 1FB5 ; UNKNOWN 6142 0x1FB6, // 1FB6..1FC4; GREEK 6143 0x1FC5, // 1FC5 ; UNKNOWN 6144 0x1FC6, // 1FC6..1FD3; GREEK 6145 0x1FD4, // 1FD4..1FD5; UNKNOWN 6146 0x1FD6, // 1FD6..1FDB; GREEK 6147 0x1FDC, // 1FDC ; UNKNOWN 6148 0x1FDD, // 1FDD..1FEF; GREEK 6149 0x1FF0, // 1FF0..1FF1; UNKNOWN 6150 0x1FF2, // 1FF2..1FF4; GREEK 6151 0x1FF5, // 1FF5 ; UNKNOWN 6152 0x1FF6, // 1FF6..1FFE; GREEK 6153 0x1FFF, // 1FFF ; UNKNOWN 6154 0x2000, // 2000..200B; COMMON 6155 0x200C, // 200C..200D; INHERITED 6156 0x200E, // 200E..2064; COMMON 6157 0x2065, // 2065 ; UNKNOWN 6158 0x2066, // 2066..2070; COMMON 6159 0x2071, // 2071 ; LATIN 6160 0x2072, // 2072..2073; UNKNOWN 6161 0x2074, // 2074..207E; COMMON 6162 0x207F, // 207F ; LATIN 6163 0x2080, // 2080..208E; COMMON 6164 0x208F, // 208F ; UNKNOWN 6165 0x2090, // 2090..209C; LATIN 6166 0x209D, // 209D..209F; UNKNOWN 6167 0x20A0, // 20A0..20C0; COMMON 6168 0x20C1, // 20C1..20CF; UNKNOWN 6169 0x20D0, // 20D0..20F0; INHERITED 6170 0x20F1, // 20F1..20FF; UNKNOWN 6171 0x2100, // 2100..2125; COMMON 6172 0x2126, // 2126 ; GREEK 6173 0x2127, // 2127..2129; COMMON 6174 0x212A, // 212A..212B; LATIN 6175 0x212C, // 212C..2131; COMMON 6176 0x2132, // 2132 ; LATIN 6177 0x2133, // 2133..214D; COMMON 6178 0x214E, // 214E ; LATIN 6179 0x214F, // 214F..215F; COMMON 6180 0x2160, // 2160..2188; LATIN 6181 0x2189, // 2189..218B; COMMON 6182 0x218C, // 218C..218F; UNKNOWN 6183 0x2190, // 2190..2429; COMMON 6184 0x242A, // 242A..243F; UNKNOWN 6185 0x2440, // 2440..244A; COMMON 6186 0x244B, // 244B..245F; UNKNOWN 6187 0x2460, // 2460..27FF; COMMON 6188 0x2800, // 2800..28FF; BRAILLE 6189 0x2900, // 2900..2B73; COMMON 6190 0x2B74, // 2B74..2B75; UNKNOWN 6191 0x2B76, // 2B76..2B95; COMMON 6192 0x2B96, // 2B96 ; UNKNOWN 6193 0x2B97, // 2B97..2BFF; COMMON 6194 0x2C00, // 2C00..2C5F; GLAGOLITIC 6195 0x2C60, // 2C60..2C7F; LATIN 6196 0x2C80, // 2C80..2CF3; COPTIC 6197 0x2CF4, // 2CF4..2CF8; UNKNOWN 6198 0x2CF9, // 2CF9..2CFF; COPTIC 6199 0x2D00, // 2D00..2D25; GEORGIAN 6200 0x2D26, // 2D26 ; UNKNOWN 6201 0x2D27, // 2D27 ; GEORGIAN 6202 0x2D28, // 2D28..2D2C; UNKNOWN 6203 0x2D2D, // 2D2D ; GEORGIAN 6204 0x2D2E, // 2D2E..2D2F; UNKNOWN 6205 0x2D30, // 2D30..2D67; TIFINAGH 6206 0x2D68, // 2D68..2D6E; UNKNOWN 6207 0x2D6F, // 2D6F..2D70; TIFINAGH 6208 0x2D71, // 2D71..2D7E; UNKNOWN 6209 0x2D7F, // 2D7F ; TIFINAGH 6210 0x2D80, // 2D80..2D96; ETHIOPIC 6211 0x2D97, // 2D97..2D9F; UNKNOWN 6212 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6213 0x2DA7, // 2DA7 ; UNKNOWN 6214 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6215 0x2DAF, // 2DAF ; UNKNOWN 6216 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6217 0x2DB7, // 2DB7 ; UNKNOWN 6218 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6219 0x2DBF, // 2DBF ; UNKNOWN 6220 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6221 0x2DC7, // 2DC7 ; UNKNOWN 6222 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6223 0x2DCF, // 2DCF ; UNKNOWN 6224 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6225 0x2DD7, // 2DD7 ; UNKNOWN 6226 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6227 0x2DDF, // 2DDF ; UNKNOWN 6228 0x2DE0, // 2DE0..2DFF; CYRILLIC 6229 0x2E00, // 2E00..2E5D; COMMON 6230 0x2E5E, // 2E5E..2E7F; UNKNOWN 6231 0x2E80, // 2E80..2E99; HAN 6232 0x2E9A, // 2E9A ; UNKNOWN 6233 0x2E9B, // 2E9B..2EF3; HAN 6234 0x2EF4, // 2EF4..2EFF; UNKNOWN 6235 0x2F00, // 2F00..2FD5; HAN 6236 0x2FD6, // 2FD6..2FEF; UNKNOWN 6237 0x2FF0, // 2FF0..3004; COMMON 6238 0x3005, // 3005 ; HAN 6239 0x3006, // 3006 ; COMMON 6240 0x3007, // 3007 ; HAN 6241 0x3008, // 3008..3020; COMMON 6242 0x3021, // 3021..3029; HAN 6243 0x302A, // 302A..302D; INHERITED 6244 0x302E, // 302E..302F; HANGUL 6245 0x3030, // 3030..3037; COMMON 6246 0x3038, // 3038..303B; HAN 6247 0x303C, // 303C..303F; COMMON 6248 0x3040, // 3040 ; UNKNOWN 6249 0x3041, // 3041..3096; HIRAGANA 6250 0x3097, // 3097..3098; UNKNOWN 6251 0x3099, // 3099..309A; INHERITED 6252 0x309B, // 309B..309C; COMMON 6253 0x309D, // 309D..309F; HIRAGANA 6254 0x30A0, // 30A0 ; COMMON 6255 0x30A1, // 30A1..30FA; KATAKANA 6256 0x30FB, // 30FB..30FC; COMMON 6257 0x30FD, // 30FD..30FF; KATAKANA 6258 0x3100, // 3100..3104; UNKNOWN 6259 0x3105, // 3105..312F; BOPOMOFO 6260 0x3130, // 3130 ; UNKNOWN 6261 0x3131, // 3131..318E; HANGUL 6262 0x318F, // 318F ; UNKNOWN 6263 0x3190, // 3190..319F; COMMON 6264 0x31A0, // 31A0..31BF; BOPOMOFO 6265 0x31C0, // 31C0..31E5; COMMON 6266 0x31E6, // 31E6..31EE; UNKNOWN 6267 0x31EF, // 31EF ; COMMON 6268 0x31F0, // 31F0..31FF; KATAKANA 6269 0x3200, // 3200..321E; HANGUL 6270 0x321F, // 321F ; UNKNOWN 6271 0x3220, // 3220..325F; COMMON 6272 0x3260, // 3260..327E; HANGUL 6273 0x327F, // 327F..32CF; COMMON 6274 0x32D0, // 32D0..32FE; KATAKANA 6275 0x32FF, // 32FF ; COMMON 6276 0x3300, // 3300..3357; KATAKANA 6277 0x3358, // 3358..33FF; COMMON 6278 0x3400, // 3400..4DBF; HAN 6279 0x4DC0, // 4DC0..4DFF; COMMON 6280 0x4E00, // 4E00..9FFF; HAN 6281 0xA000, // A000..A48C; YI 6282 0xA48D, // A48D..A48F; UNKNOWN 6283 0xA490, // A490..A4C6; YI 6284 0xA4C7, // A4C7..A4CF; UNKNOWN 6285 0xA4D0, // A4D0..A4FF; LISU 6286 0xA500, // A500..A62B; VAI 6287 0xA62C, // A62C..A63F; UNKNOWN 6288 0xA640, // A640..A69F; CYRILLIC 6289 0xA6A0, // A6A0..A6F7; BAMUM 6290 0xA6F8, // A6F8..A6FF; UNKNOWN 6291 0xA700, // A700..A721; COMMON 6292 0xA722, // A722..A787; LATIN 6293 0xA788, // A788..A78A; COMMON 6294 0xA78B, // A78B..A7CD; LATIN 6295 0xA7CE, // A7CE..A7CF; UNKNOWN 6296 0xA7D0, // A7D0..A7D1; LATIN 6297 0xA7D2, // A7D2 ; UNKNOWN 6298 0xA7D3, // A7D3 ; LATIN 6299 0xA7D4, // A7D4 ; UNKNOWN 6300 0xA7D5, // A7D5..A7DC; LATIN 6301 0xA7DD, // A7DD..A7F1; UNKNOWN 6302 0xA7F2, // A7F2..A7FF; LATIN 6303 0xA800, // A800..A82C; SYLOTI_NAGRI 6304 0xA82D, // A82D..A82F; UNKNOWN 6305 0xA830, // A830..A839; COMMON 6306 0xA83A, // A83A..A83F; UNKNOWN 6307 0xA840, // A840..A877; PHAGS_PA 6308 0xA878, // A878..A87F; UNKNOWN 6309 0xA880, // A880..A8C5; SAURASHTRA 6310 0xA8C6, // A8C6..A8CD; UNKNOWN 6311 0xA8CE, // A8CE..A8D9; SAURASHTRA 6312 0xA8DA, // A8DA..A8DF; UNKNOWN 6313 0xA8E0, // A8E0..A8FF; DEVANAGARI 6314 0xA900, // A900..A92D; KAYAH_LI 6315 0xA92E, // A92E ; COMMON 6316 0xA92F, // A92F ; KAYAH_LI 6317 0xA930, // A930..A953; REJANG 6318 0xA954, // A954..A95E; UNKNOWN 6319 0xA95F, // A95F ; REJANG 6320 0xA960, // A960..A97C; HANGUL 6321 0xA97D, // A97D..A97F; UNKNOWN 6322 0xA980, // A980..A9CD; JAVANESE 6323 0xA9CE, // A9CE ; UNKNOWN 6324 0xA9CF, // A9CF ; COMMON 6325 0xA9D0, // A9D0..A9D9; JAVANESE 6326 0xA9DA, // A9DA..A9DD; UNKNOWN 6327 0xA9DE, // A9DE..A9DF; JAVANESE 6328 0xA9E0, // A9E0..A9FE; MYANMAR 6329 0xA9FF, // A9FF ; UNKNOWN 6330 0xAA00, // AA00..AA36; CHAM 6331 0xAA37, // AA37..AA3F; UNKNOWN 6332 0xAA40, // AA40..AA4D; CHAM 6333 0xAA4E, // AA4E..AA4F; UNKNOWN 6334 0xAA50, // AA50..AA59; CHAM 6335 0xAA5A, // AA5A..AA5B; UNKNOWN 6336 0xAA5C, // AA5C..AA5F; CHAM 6337 0xAA60, // AA60..AA7F; MYANMAR 6338 0xAA80, // AA80..AAC2; TAI_VIET 6339 0xAAC3, // AAC3..AADA; UNKNOWN 6340 0xAADB, // AADB..AADF; TAI_VIET 6341 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6342 0xAAF7, // AAF7..AB00; UNKNOWN 6343 0xAB01, // AB01..AB06; ETHIOPIC 6344 0xAB07, // AB07..AB08; UNKNOWN 6345 0xAB09, // AB09..AB0E; ETHIOPIC 6346 0xAB0F, // AB0F..AB10; UNKNOWN 6347 0xAB11, // AB11..AB16; ETHIOPIC 6348 0xAB17, // AB17..AB1F; UNKNOWN 6349 0xAB20, // AB20..AB26; ETHIOPIC 6350 0xAB27, // AB27 ; UNKNOWN 6351 0xAB28, // AB28..AB2E; ETHIOPIC 6352 0xAB2F, // AB2F ; UNKNOWN 6353 0xAB30, // AB30..AB5A; LATIN 6354 0xAB5B, // AB5B ; COMMON 6355 0xAB5C, // AB5C..AB64; LATIN 6356 0xAB65, // AB65 ; GREEK 6357 0xAB66, // AB66..AB69; LATIN 6358 0xAB6A, // AB6A..AB6B; COMMON 6359 0xAB6C, // AB6C..AB6F; UNKNOWN 6360 0xAB70, // AB70..ABBF; CHEROKEE 6361 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6362 0xABEE, // ABEE..ABEF; UNKNOWN 6363 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6364 0xABFA, // ABFA..ABFF; UNKNOWN 6365 0xAC00, // AC00..D7A3; HANGUL 6366 0xD7A4, // D7A4..D7AF; UNKNOWN 6367 0xD7B0, // D7B0..D7C6; HANGUL 6368 0xD7C7, // D7C7..D7CA; UNKNOWN 6369 0xD7CB, // D7CB..D7FB; HANGUL 6370 0xD7FC, // D7FC..F8FF; UNKNOWN 6371 0xF900, // F900..FA6D; HAN 6372 0xFA6E, // FA6E..FA6F; UNKNOWN 6373 0xFA70, // FA70..FAD9; HAN 6374 0xFADA, // FADA..FAFF; UNKNOWN 6375 0xFB00, // FB00..FB06; LATIN 6376 0xFB07, // FB07..FB12; UNKNOWN 6377 0xFB13, // FB13..FB17; ARMENIAN 6378 0xFB18, // FB18..FB1C; UNKNOWN 6379 0xFB1D, // FB1D..FB36; HEBREW 6380 0xFB37, // FB37 ; UNKNOWN 6381 0xFB38, // FB38..FB3C; HEBREW 6382 0xFB3D, // FB3D ; UNKNOWN 6383 0xFB3E, // FB3E ; HEBREW 6384 0xFB3F, // FB3F ; UNKNOWN 6385 0xFB40, // FB40..FB41; HEBREW 6386 0xFB42, // FB42 ; UNKNOWN 6387 0xFB43, // FB43..FB44; HEBREW 6388 0xFB45, // FB45 ; UNKNOWN 6389 0xFB46, // FB46..FB4F; HEBREW 6390 0xFB50, // FB50..FBC2; ARABIC 6391 0xFBC3, // FBC3..FBD2; UNKNOWN 6392 0xFBD3, // FBD3..FD3D; ARABIC 6393 0xFD3E, // FD3E..FD3F; COMMON 6394 0xFD40, // FD40..FD8F; ARABIC 6395 0xFD90, // FD90..FD91; UNKNOWN 6396 0xFD92, // FD92..FDC7; ARABIC 6397 0xFDC8, // FDC8..FDCE; UNKNOWN 6398 0xFDCF, // FDCF ; ARABIC 6399 0xFDD0, // FDD0..FDEF; UNKNOWN 6400 0xFDF0, // FDF0..FDFF; ARABIC 6401 0xFE00, // FE00..FE0F; INHERITED 6402 0xFE10, // FE10..FE19; COMMON 6403 0xFE1A, // FE1A..FE1F; UNKNOWN 6404 0xFE20, // FE20..FE2D; INHERITED 6405 0xFE2E, // FE2E..FE2F; CYRILLIC 6406 0xFE30, // FE30..FE52; COMMON 6407 0xFE53, // FE53 ; UNKNOWN 6408 0xFE54, // FE54..FE66; COMMON 6409 0xFE67, // FE67 ; UNKNOWN 6410 0xFE68, // FE68..FE6B; COMMON 6411 0xFE6C, // FE6C..FE6F; UNKNOWN 6412 0xFE70, // FE70..FE74; ARABIC 6413 0xFE75, // FE75 ; UNKNOWN 6414 0xFE76, // FE76..FEFC; ARABIC 6415 0xFEFD, // FEFD..FEFE; UNKNOWN 6416 0xFEFF, // FEFF ; COMMON 6417 0xFF00, // FF00 ; UNKNOWN 6418 0xFF01, // FF01..FF20; COMMON 6419 0xFF21, // FF21..FF3A; LATIN 6420 0xFF3B, // FF3B..FF40; COMMON 6421 0xFF41, // FF41..FF5A; LATIN 6422 0xFF5B, // FF5B..FF65; COMMON 6423 0xFF66, // FF66..FF6F; KATAKANA 6424 0xFF70, // FF70 ; COMMON 6425 0xFF71, // FF71..FF9D; KATAKANA 6426 0xFF9E, // FF9E..FF9F; COMMON 6427 0xFFA0, // FFA0..FFBE; HANGUL 6428 0xFFBF, // FFBF..FFC1; UNKNOWN 6429 0xFFC2, // FFC2..FFC7; HANGUL 6430 0xFFC8, // FFC8..FFC9; UNKNOWN 6431 0xFFCA, // FFCA..FFCF; HANGUL 6432 0xFFD0, // FFD0..FFD1; UNKNOWN 6433 0xFFD2, // FFD2..FFD7; HANGUL 6434 0xFFD8, // FFD8..FFD9; UNKNOWN 6435 0xFFDA, // FFDA..FFDC; HANGUL 6436 0xFFDD, // FFDD..FFDF; UNKNOWN 6437 0xFFE0, // FFE0..FFE6; COMMON 6438 0xFFE7, // FFE7 ; UNKNOWN 6439 0xFFE8, // FFE8..FFEE; COMMON 6440 0xFFEF, // FFEF..FFF8; UNKNOWN 6441 0xFFF9, // FFF9..FFFD; COMMON 6442 0xFFFE, // FFFE..FFFF; UNKNOWN 6443 0x10000, // 10000..1000B; LINEAR_B 6444 0x1000C, // 1000C ; UNKNOWN 6445 0x1000D, // 1000D..10026; LINEAR_B 6446 0x10027, // 10027 ; UNKNOWN 6447 0x10028, // 10028..1003A; LINEAR_B 6448 0x1003B, // 1003B ; UNKNOWN 6449 0x1003C, // 1003C..1003D; LINEAR_B 6450 0x1003E, // 1003E ; UNKNOWN 6451 0x1003F, // 1003F..1004D; LINEAR_B 6452 0x1004E, // 1004E..1004F; UNKNOWN 6453 0x10050, // 10050..1005D; LINEAR_B 6454 0x1005E, // 1005E..1007F; UNKNOWN 6455 0x10080, // 10080..100FA; LINEAR_B 6456 0x100FB, // 100FB..100FF; UNKNOWN 6457 0x10100, // 10100..10102; COMMON 6458 0x10103, // 10103..10106; UNKNOWN 6459 0x10107, // 10107..10133; COMMON 6460 0x10134, // 10134..10136; UNKNOWN 6461 0x10137, // 10137..1013F; COMMON 6462 0x10140, // 10140..1018E; GREEK 6463 0x1018F, // 1018F ; UNKNOWN 6464 0x10190, // 10190..1019C; COMMON 6465 0x1019D, // 1019D..1019F; UNKNOWN 6466 0x101A0, // 101A0 ; GREEK 6467 0x101A1, // 101A1..101CF; UNKNOWN 6468 0x101D0, // 101D0..101FC; COMMON 6469 0x101FD, // 101FD ; INHERITED 6470 0x101FE, // 101FE..1027F; UNKNOWN 6471 0x10280, // 10280..1029C; LYCIAN 6472 0x1029D, // 1029D..1029F; UNKNOWN 6473 0x102A0, // 102A0..102D0; CARIAN 6474 0x102D1, // 102D1..102DF; UNKNOWN 6475 0x102E0, // 102E0 ; INHERITED 6476 0x102E1, // 102E1..102FB; COMMON 6477 0x102FC, // 102FC..102FF; UNKNOWN 6478 0x10300, // 10300..10323; OLD_ITALIC 6479 0x10324, // 10324..1032C; UNKNOWN 6480 0x1032D, // 1032D..1032F; OLD_ITALIC 6481 0x10330, // 10330..1034A; GOTHIC 6482 0x1034B, // 1034B..1034F; UNKNOWN 6483 0x10350, // 10350..1037A; OLD_PERMIC 6484 0x1037B, // 1037B..1037F; UNKNOWN 6485 0x10380, // 10380..1039D; UGARITIC 6486 0x1039E, // 1039E ; UNKNOWN 6487 0x1039F, // 1039F ; UGARITIC 6488 0x103A0, // 103A0..103C3; OLD_PERSIAN 6489 0x103C4, // 103C4..103C7; UNKNOWN 6490 0x103C8, // 103C8..103D5; OLD_PERSIAN 6491 0x103D6, // 103D6..103FF; UNKNOWN 6492 0x10400, // 10400..1044F; DESERET 6493 0x10450, // 10450..1047F; SHAVIAN 6494 0x10480, // 10480..1049D; OSMANYA 6495 0x1049E, // 1049E..1049F; UNKNOWN 6496 0x104A0, // 104A0..104A9; OSMANYA 6497 0x104AA, // 104AA..104AF; UNKNOWN 6498 0x104B0, // 104B0..104D3; OSAGE 6499 0x104D4, // 104D4..104D7; UNKNOWN 6500 0x104D8, // 104D8..104FB; OSAGE 6501 0x104FC, // 104FC..104FF; UNKNOWN 6502 0x10500, // 10500..10527; ELBASAN 6503 0x10528, // 10528..1052F; UNKNOWN 6504 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6505 0x10564, // 10564..1056E; UNKNOWN 6506 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6507 0x10570, // 10570..1057A; VITHKUQI 6508 0x1057B, // 1057B ; UNKNOWN 6509 0x1057C, // 1057C..1058A; VITHKUQI 6510 0x1058B, // 1058B ; UNKNOWN 6511 0x1058C, // 1058C..10592; VITHKUQI 6512 0x10593, // 10593 ; UNKNOWN 6513 0x10594, // 10594..10595; VITHKUQI 6514 0x10596, // 10596 ; UNKNOWN 6515 0x10597, // 10597..105A1; VITHKUQI 6516 0x105A2, // 105A2 ; UNKNOWN 6517 0x105A3, // 105A3..105B1; VITHKUQI 6518 0x105B2, // 105B2 ; UNKNOWN 6519 0x105B3, // 105B3..105B9; VITHKUQI 6520 0x105BA, // 105BA ; UNKNOWN 6521 0x105BB, // 105BB..105BC; VITHKUQI 6522 0x105BD, // 105BD..105BF; UNKNOWN 6523 0x105C0, // 105C0..105F3; TODHRI 6524 0x105F4, // 105F4..105FF; UNKNOWN 6525 0x10600, // 10600..10736; LINEAR_A 6526 0x10737, // 10737..1073F; UNKNOWN 6527 0x10740, // 10740..10755; LINEAR_A 6528 0x10756, // 10756..1075F; UNKNOWN 6529 0x10760, // 10760..10767; LINEAR_A 6530 0x10768, // 10768..1077F; UNKNOWN 6531 0x10780, // 10780..10785; LATIN 6532 0x10786, // 10786 ; UNKNOWN 6533 0x10787, // 10787..107B0; LATIN 6534 0x107B1, // 107B1 ; UNKNOWN 6535 0x107B2, // 107B2..107BA; LATIN 6536 0x107BB, // 107BB..107FF; UNKNOWN 6537 0x10800, // 10800..10805; CYPRIOT 6538 0x10806, // 10806..10807; UNKNOWN 6539 0x10808, // 10808 ; CYPRIOT 6540 0x10809, // 10809 ; UNKNOWN 6541 0x1080A, // 1080A..10835; CYPRIOT 6542 0x10836, // 10836 ; UNKNOWN 6543 0x10837, // 10837..10838; CYPRIOT 6544 0x10839, // 10839..1083B; UNKNOWN 6545 0x1083C, // 1083C ; CYPRIOT 6546 0x1083D, // 1083D..1083E; UNKNOWN 6547 0x1083F, // 1083F ; CYPRIOT 6548 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6549 0x10856, // 10856 ; UNKNOWN 6550 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6551 0x10860, // 10860..1087F; PALMYRENE 6552 0x10880, // 10880..1089E; NABATAEAN 6553 0x1089F, // 1089F..108A6; UNKNOWN 6554 0x108A7, // 108A7..108AF; NABATAEAN 6555 0x108B0, // 108B0..108DF; UNKNOWN 6556 0x108E0, // 108E0..108F2; HATRAN 6557 0x108F3, // 108F3 ; UNKNOWN 6558 0x108F4, // 108F4..108F5; HATRAN 6559 0x108F6, // 108F6..108FA; UNKNOWN 6560 0x108FB, // 108FB..108FF; HATRAN 6561 0x10900, // 10900..1091B; PHOENICIAN 6562 0x1091C, // 1091C..1091E; UNKNOWN 6563 0x1091F, // 1091F ; PHOENICIAN 6564 0x10920, // 10920..10939; LYDIAN 6565 0x1093A, // 1093A..1093E; UNKNOWN 6566 0x1093F, // 1093F ; LYDIAN 6567 0x10940, // 10940..1097F; UNKNOWN 6568 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6569 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6570 0x109B8, // 109B8..109BB; UNKNOWN 6571 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6572 0x109D0, // 109D0..109D1; UNKNOWN 6573 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6574 0x10A00, // 10A00..10A03; KHAROSHTHI 6575 0x10A04, // 10A04 ; UNKNOWN 6576 0x10A05, // 10A05..10A06; KHAROSHTHI 6577 0x10A07, // 10A07..10A0B; UNKNOWN 6578 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6579 0x10A14, // 10A14 ; UNKNOWN 6580 0x10A15, // 10A15..10A17; KHAROSHTHI 6581 0x10A18, // 10A18 ; UNKNOWN 6582 0x10A19, // 10A19..10A35; KHAROSHTHI 6583 0x10A36, // 10A36..10A37; UNKNOWN 6584 0x10A38, // 10A38..10A3A; KHAROSHTHI 6585 0x10A3B, // 10A3B..10A3E; UNKNOWN 6586 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6587 0x10A49, // 10A49..10A4F; UNKNOWN 6588 0x10A50, // 10A50..10A58; KHAROSHTHI 6589 0x10A59, // 10A59..10A5F; UNKNOWN 6590 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6591 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6592 0x10AA0, // 10AA0..10ABF; UNKNOWN 6593 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6594 0x10AE7, // 10AE7..10AEA; UNKNOWN 6595 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6596 0x10AF7, // 10AF7..10AFF; UNKNOWN 6597 0x10B00, // 10B00..10B35; AVESTAN 6598 0x10B36, // 10B36..10B38; UNKNOWN 6599 0x10B39, // 10B39..10B3F; AVESTAN 6600 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6601 0x10B56, // 10B56..10B57; UNKNOWN 6602 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6603 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6604 0x10B73, // 10B73..10B77; UNKNOWN 6605 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6606 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6607 0x10B92, // 10B92..10B98; UNKNOWN 6608 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6609 0x10B9D, // 10B9D..10BA8; UNKNOWN 6610 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6611 0x10BB0, // 10BB0..10BFF; UNKNOWN 6612 0x10C00, // 10C00..10C48; OLD_TURKIC 6613 0x10C49, // 10C49..10C7F; UNKNOWN 6614 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6615 0x10CB3, // 10CB3..10CBF; UNKNOWN 6616 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6617 0x10CF3, // 10CF3..10CF9; UNKNOWN 6618 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6619 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6620 0x10D28, // 10D28..10D2F; UNKNOWN 6621 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6622 0x10D3A, // 10D3A..10D3F; UNKNOWN 6623 0x10D40, // 10D40..10D65; GARAY 6624 0x10D66, // 10D66..10D68; UNKNOWN 6625 0x10D69, // 10D69..10D85; GARAY 6626 0x10D86, // 10D86..10D8D; UNKNOWN 6627 0x10D8E, // 10D8E..10D8F; GARAY 6628 0x10D90, // 10D90..10E5F; UNKNOWN 6629 0x10E60, // 10E60..10E7E; ARABIC 6630 0x10E7F, // 10E7F ; UNKNOWN 6631 0x10E80, // 10E80..10EA9; YEZIDI 6632 0x10EAA, // 10EAA ; UNKNOWN 6633 0x10EAB, // 10EAB..10EAD; YEZIDI 6634 0x10EAE, // 10EAE..10EAF; UNKNOWN 6635 0x10EB0, // 10EB0..10EB1; YEZIDI 6636 0x10EB2, // 10EB2..10EC1; UNKNOWN 6637 0x10EC2, // 10EC2..10EC4; ARABIC 6638 0x10EC5, // 10EC5..10EFB; UNKNOWN 6639 0x10EFC, // 10EFC..10EFF; ARABIC 6640 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6641 0x10F28, // 10F28..10F2F; UNKNOWN 6642 0x10F30, // 10F30..10F59; SOGDIAN 6643 0x10F5A, // 10F5A..10F6F; UNKNOWN 6644 0x10F70, // 10F70..10F89; OLD_UYGHUR 6645 0x10F8A, // 10F8A..10FAF; UNKNOWN 6646 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6647 0x10FCC, // 10FCC..10FDF; UNKNOWN 6648 0x10FE0, // 10FE0..10FF6; ELYMAIC 6649 0x10FF7, // 10FF7..10FFF; UNKNOWN 6650 0x11000, // 11000..1104D; BRAHMI 6651 0x1104E, // 1104E..11051; UNKNOWN 6652 0x11052, // 11052..11075; BRAHMI 6653 0x11076, // 11076..1107E; UNKNOWN 6654 0x1107F, // 1107F ; BRAHMI 6655 0x11080, // 11080..110C2; KAITHI 6656 0x110C3, // 110C3..110CC; UNKNOWN 6657 0x110CD, // 110CD ; KAITHI 6658 0x110CE, // 110CE..110CF; UNKNOWN 6659 0x110D0, // 110D0..110E8; SORA_SOMPENG 6660 0x110E9, // 110E9..110EF; UNKNOWN 6661 0x110F0, // 110F0..110F9; SORA_SOMPENG 6662 0x110FA, // 110FA..110FF; UNKNOWN 6663 0x11100, // 11100..11134; CHAKMA 6664 0x11135, // 11135 ; UNKNOWN 6665 0x11136, // 11136..11147; CHAKMA 6666 0x11148, // 11148..1114F; UNKNOWN 6667 0x11150, // 11150..11176; MAHAJANI 6668 0x11177, // 11177..1117F; UNKNOWN 6669 0x11180, // 11180..111DF; SHARADA 6670 0x111E0, // 111E0 ; UNKNOWN 6671 0x111E1, // 111E1..111F4; SINHALA 6672 0x111F5, // 111F5..111FF; UNKNOWN 6673 0x11200, // 11200..11211; KHOJKI 6674 0x11212, // 11212 ; UNKNOWN 6675 0x11213, // 11213..11241; KHOJKI 6676 0x11242, // 11242..1127F; UNKNOWN 6677 0x11280, // 11280..11286; MULTANI 6678 0x11287, // 11287 ; UNKNOWN 6679 0x11288, // 11288 ; MULTANI 6680 0x11289, // 11289 ; UNKNOWN 6681 0x1128A, // 1128A..1128D; MULTANI 6682 0x1128E, // 1128E ; UNKNOWN 6683 0x1128F, // 1128F..1129D; MULTANI 6684 0x1129E, // 1129E ; UNKNOWN 6685 0x1129F, // 1129F..112A9; MULTANI 6686 0x112AA, // 112AA..112AF; UNKNOWN 6687 0x112B0, // 112B0..112EA; KHUDAWADI 6688 0x112EB, // 112EB..112EF; UNKNOWN 6689 0x112F0, // 112F0..112F9; KHUDAWADI 6690 0x112FA, // 112FA..112FF; UNKNOWN 6691 0x11300, // 11300..11303; GRANTHA 6692 0x11304, // 11304 ; UNKNOWN 6693 0x11305, // 11305..1130C; GRANTHA 6694 0x1130D, // 1130D..1130E; UNKNOWN 6695 0x1130F, // 1130F..11310; GRANTHA 6696 0x11311, // 11311..11312; UNKNOWN 6697 0x11313, // 11313..11328; GRANTHA 6698 0x11329, // 11329 ; UNKNOWN 6699 0x1132A, // 1132A..11330; GRANTHA 6700 0x11331, // 11331 ; UNKNOWN 6701 0x11332, // 11332..11333; GRANTHA 6702 0x11334, // 11334 ; UNKNOWN 6703 0x11335, // 11335..11339; GRANTHA 6704 0x1133A, // 1133A ; UNKNOWN 6705 0x1133B, // 1133B ; INHERITED 6706 0x1133C, // 1133C..11344; GRANTHA 6707 0x11345, // 11345..11346; UNKNOWN 6708 0x11347, // 11347..11348; GRANTHA 6709 0x11349, // 11349..1134A; UNKNOWN 6710 0x1134B, // 1134B..1134D; GRANTHA 6711 0x1134E, // 1134E..1134F; UNKNOWN 6712 0x11350, // 11350 ; GRANTHA 6713 0x11351, // 11351..11356; UNKNOWN 6714 0x11357, // 11357 ; GRANTHA 6715 0x11358, // 11358..1135C; UNKNOWN 6716 0x1135D, // 1135D..11363; GRANTHA 6717 0x11364, // 11364..11365; UNKNOWN 6718 0x11366, // 11366..1136C; GRANTHA 6719 0x1136D, // 1136D..1136F; UNKNOWN 6720 0x11370, // 11370..11374; GRANTHA 6721 0x11375, // 11375..1137F; UNKNOWN 6722 0x11380, // 11380..11389; TULU_TIGALARI 6723 0x1138A, // 1138A ; UNKNOWN 6724 0x1138B, // 1138B ; TULU_TIGALARI 6725 0x1138C, // 1138C..1138D; UNKNOWN 6726 0x1138E, // 1138E ; TULU_TIGALARI 6727 0x1138F, // 1138F ; UNKNOWN 6728 0x11390, // 11390..113B5; TULU_TIGALARI 6729 0x113B6, // 113B6 ; UNKNOWN 6730 0x113B7, // 113B7..113C0; TULU_TIGALARI 6731 0x113C1, // 113C1 ; UNKNOWN 6732 0x113C2, // 113C2 ; TULU_TIGALARI 6733 0x113C3, // 113C3..113C4; UNKNOWN 6734 0x113C5, // 113C5 ; TULU_TIGALARI 6735 0x113C6, // 113C6 ; UNKNOWN 6736 0x113C7, // 113C7..113CA; TULU_TIGALARI 6737 0x113CB, // 113CB ; UNKNOWN 6738 0x113CC, // 113CC..113D5; TULU_TIGALARI 6739 0x113D6, // 113D6 ; UNKNOWN 6740 0x113D7, // 113D7..113D8; TULU_TIGALARI 6741 0x113D9, // 113D9..113E0; UNKNOWN 6742 0x113E1, // 113E1..113E2; TULU_TIGALARI 6743 0x113E3, // 113E3..113FF; UNKNOWN 6744 0x11400, // 11400..1145B; NEWA 6745 0x1145C, // 1145C ; UNKNOWN 6746 0x1145D, // 1145D..11461; NEWA 6747 0x11462, // 11462..1147F; UNKNOWN 6748 0x11480, // 11480..114C7; TIRHUTA 6749 0x114C8, // 114C8..114CF; UNKNOWN 6750 0x114D0, // 114D0..114D9; TIRHUTA 6751 0x114DA, // 114DA..1157F; UNKNOWN 6752 0x11580, // 11580..115B5; SIDDHAM 6753 0x115B6, // 115B6..115B7; UNKNOWN 6754 0x115B8, // 115B8..115DD; SIDDHAM 6755 0x115DE, // 115DE..115FF; UNKNOWN 6756 0x11600, // 11600..11644; MODI 6757 0x11645, // 11645..1164F; UNKNOWN 6758 0x11650, // 11650..11659; MODI 6759 0x1165A, // 1165A..1165F; UNKNOWN 6760 0x11660, // 11660..1166C; MONGOLIAN 6761 0x1166D, // 1166D..1167F; UNKNOWN 6762 0x11680, // 11680..116B9; TAKRI 6763 0x116BA, // 116BA..116BF; UNKNOWN 6764 0x116C0, // 116C0..116C9; TAKRI 6765 0x116CA, // 116CA..116CF; UNKNOWN 6766 0x116D0, // 116D0..116E3; MYANMAR 6767 0x116E4, // 116E4..116FF; UNKNOWN 6768 0x11700, // 11700..1171A; AHOM 6769 0x1171B, // 1171B..1171C; UNKNOWN 6770 0x1171D, // 1171D..1172B; AHOM 6771 0x1172C, // 1172C..1172F; UNKNOWN 6772 0x11730, // 11730..11746; AHOM 6773 0x11747, // 11747..117FF; UNKNOWN 6774 0x11800, // 11800..1183B; DOGRA 6775 0x1183C, // 1183C..1189F; UNKNOWN 6776 0x118A0, // 118A0..118F2; WARANG_CITI 6777 0x118F3, // 118F3..118FE; UNKNOWN 6778 0x118FF, // 118FF ; WARANG_CITI 6779 0x11900, // 11900..11906; DIVES_AKURU 6780 0x11907, // 11907..11908; UNKNOWN 6781 0x11909, // 11909 ; DIVES_AKURU 6782 0x1190A, // 1190A..1190B; UNKNOWN 6783 0x1190C, // 1190C..11913; DIVES_AKURU 6784 0x11914, // 11914 ; UNKNOWN 6785 0x11915, // 11915..11916; DIVES_AKURU 6786 0x11917, // 11917 ; UNKNOWN 6787 0x11918, // 11918..11935; DIVES_AKURU 6788 0x11936, // 11936 ; UNKNOWN 6789 0x11937, // 11937..11938; DIVES_AKURU 6790 0x11939, // 11939..1193A; UNKNOWN 6791 0x1193B, // 1193B..11946; DIVES_AKURU 6792 0x11947, // 11947..1194F; UNKNOWN 6793 0x11950, // 11950..11959; DIVES_AKURU 6794 0x1195A, // 1195A..1199F; UNKNOWN 6795 0x119A0, // 119A0..119A7; NANDINAGARI 6796 0x119A8, // 119A8..119A9; UNKNOWN 6797 0x119AA, // 119AA..119D7; NANDINAGARI 6798 0x119D8, // 119D8..119D9; UNKNOWN 6799 0x119DA, // 119DA..119E4; NANDINAGARI 6800 0x119E5, // 119E5..119FF; UNKNOWN 6801 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6802 0x11A48, // 11A48..11A4F; UNKNOWN 6803 0x11A50, // 11A50..11AA2; SOYOMBO 6804 0x11AA3, // 11AA3..11AAF; UNKNOWN 6805 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6806 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6807 0x11AF9, // 11AF9..11AFF; UNKNOWN 6808 0x11B00, // 11B00..11B09; DEVANAGARI 6809 0x11B0A, // 11B0A..11BBF; UNKNOWN 6810 0x11BC0, // 11BC0..11BE1; SUNUWAR 6811 0x11BE2, // 11BE2..11BEF; UNKNOWN 6812 0x11BF0, // 11BF0..11BF9; SUNUWAR 6813 0x11BFA, // 11BFA..11BFF; UNKNOWN 6814 0x11C00, // 11C00..11C08; BHAIKSUKI 6815 0x11C09, // 11C09 ; UNKNOWN 6816 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6817 0x11C37, // 11C37 ; UNKNOWN 6818 0x11C38, // 11C38..11C45; BHAIKSUKI 6819 0x11C46, // 11C46..11C4F; UNKNOWN 6820 0x11C50, // 11C50..11C6C; BHAIKSUKI 6821 0x11C6D, // 11C6D..11C6F; UNKNOWN 6822 0x11C70, // 11C70..11C8F; MARCHEN 6823 0x11C90, // 11C90..11C91; UNKNOWN 6824 0x11C92, // 11C92..11CA7; MARCHEN 6825 0x11CA8, // 11CA8 ; UNKNOWN 6826 0x11CA9, // 11CA9..11CB6; MARCHEN 6827 0x11CB7, // 11CB7..11CFF; UNKNOWN 6828 0x11D00, // 11D00..11D06; MASARAM_GONDI 6829 0x11D07, // 11D07 ; UNKNOWN 6830 0x11D08, // 11D08..11D09; MASARAM_GONDI 6831 0x11D0A, // 11D0A ; UNKNOWN 6832 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6833 0x11D37, // 11D37..11D39; UNKNOWN 6834 0x11D3A, // 11D3A ; MASARAM_GONDI 6835 0x11D3B, // 11D3B ; UNKNOWN 6836 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6837 0x11D3E, // 11D3E ; UNKNOWN 6838 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6839 0x11D48, // 11D48..11D4F; UNKNOWN 6840 0x11D50, // 11D50..11D59; MASARAM_GONDI 6841 0x11D5A, // 11D5A..11D5F; UNKNOWN 6842 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6843 0x11D66, // 11D66 ; UNKNOWN 6844 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6845 0x11D69, // 11D69 ; UNKNOWN 6846 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6847 0x11D8F, // 11D8F ; UNKNOWN 6848 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6849 0x11D92, // 11D92 ; UNKNOWN 6850 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6851 0x11D99, // 11D99..11D9F; UNKNOWN 6852 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6853 0x11DAA, // 11DAA..11EDF; UNKNOWN 6854 0x11EE0, // 11EE0..11EF8; MAKASAR 6855 0x11EF9, // 11EF9..11EFF; UNKNOWN 6856 0x11F00, // 11F00..11F10; KAWI 6857 0x11F11, // 11F11 ; UNKNOWN 6858 0x11F12, // 11F12..11F3A; KAWI 6859 0x11F3B, // 11F3B..11F3D; UNKNOWN 6860 0x11F3E, // 11F3E..11F5A; KAWI 6861 0x11F5B, // 11F5B..11FAF; UNKNOWN 6862 0x11FB0, // 11FB0 ; LISU 6863 0x11FB1, // 11FB1..11FBF; UNKNOWN 6864 0x11FC0, // 11FC0..11FF1; TAMIL 6865 0x11FF2, // 11FF2..11FFE; UNKNOWN 6866 0x11FFF, // 11FFF ; TAMIL 6867 0x12000, // 12000..12399; CUNEIFORM 6868 0x1239A, // 1239A..123FF; UNKNOWN 6869 0x12400, // 12400..1246E; CUNEIFORM 6870 0x1246F, // 1246F ; UNKNOWN 6871 0x12470, // 12470..12474; CUNEIFORM 6872 0x12475, // 12475..1247F; UNKNOWN 6873 0x12480, // 12480..12543; CUNEIFORM 6874 0x12544, // 12544..12F8F; UNKNOWN 6875 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6876 0x12FF3, // 12FF3..12FFF; UNKNOWN 6877 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6878 0x13456, // 13456..1345F; UNKNOWN 6879 0x13460, // 13460..143FA; EGYPTIAN_HIEROGLYPHS 6880 0x143FB, // 143FB..143FF; UNKNOWN 6881 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6882 0x14647, // 14647..160FF; UNKNOWN 6883 0x16100, // 16100..16139; GURUNG_KHEMA 6884 0x1613A, // 1613A..167FF; UNKNOWN 6885 0x16800, // 16800..16A38; BAMUM 6886 0x16A39, // 16A39..16A3F; UNKNOWN 6887 0x16A40, // 16A40..16A5E; MRO 6888 0x16A5F, // 16A5F ; UNKNOWN 6889 0x16A60, // 16A60..16A69; MRO 6890 0x16A6A, // 16A6A..16A6D; UNKNOWN 6891 0x16A6E, // 16A6E..16A6F; MRO 6892 0x16A70, // 16A70..16ABE; TANGSA 6893 0x16ABF, // 16ABF ; UNKNOWN 6894 0x16AC0, // 16AC0..16AC9; TANGSA 6895 0x16ACA, // 16ACA..16ACF; UNKNOWN 6896 0x16AD0, // 16AD0..16AED; BASSA_VAH 6897 0x16AEE, // 16AEE..16AEF; UNKNOWN 6898 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6899 0x16AF6, // 16AF6..16AFF; UNKNOWN 6900 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6901 0x16B46, // 16B46..16B4F; UNKNOWN 6902 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6903 0x16B5A, // 16B5A ; UNKNOWN 6904 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6905 0x16B62, // 16B62 ; UNKNOWN 6906 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6907 0x16B78, // 16B78..16B7C; UNKNOWN 6908 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6909 0x16B90, // 16B90..16D3F; UNKNOWN 6910 0x16D40, // 16D40..16D79; KIRAT_RAI 6911 0x16D7A, // 16D7A..16E3F; UNKNOWN 6912 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6913 0x16E9B, // 16E9B..16EFF; UNKNOWN 6914 0x16F00, // 16F00..16F4A; MIAO 6915 0x16F4B, // 16F4B..16F4E; UNKNOWN 6916 0x16F4F, // 16F4F..16F87; MIAO 6917 0x16F88, // 16F88..16F8E; UNKNOWN 6918 0x16F8F, // 16F8F..16F9F; MIAO 6919 0x16FA0, // 16FA0..16FDF; UNKNOWN 6920 0x16FE0, // 16FE0 ; TANGUT 6921 0x16FE1, // 16FE1 ; NUSHU 6922 0x16FE2, // 16FE2..16FE3; HAN 6923 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6924 0x16FE5, // 16FE5..16FEF; UNKNOWN 6925 0x16FF0, // 16FF0..16FF1; HAN 6926 0x16FF2, // 16FF2..16FFF; UNKNOWN 6927 0x17000, // 17000..187F7; TANGUT 6928 0x187F8, // 187F8..187FF; UNKNOWN 6929 0x18800, // 18800..18AFF; TANGUT 6930 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6931 0x18CD6, // 18CD6..18CFE; UNKNOWN 6932 0x18CFF, // 18CFF ; KHITAN_SMALL_SCRIPT 6933 0x18D00, // 18D00..18D08; TANGUT 6934 0x18D09, // 18D09..1AFEF; UNKNOWN 6935 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6936 0x1AFF4, // 1AFF4 ; UNKNOWN 6937 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6938 0x1AFFC, // 1AFFC ; UNKNOWN 6939 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6940 0x1AFFF, // 1AFFF ; UNKNOWN 6941 0x1B000, // 1B000 ; KATAKANA 6942 0x1B001, // 1B001..1B11F; HIRAGANA 6943 0x1B120, // 1B120..1B122; KATAKANA 6944 0x1B123, // 1B123..1B131; UNKNOWN 6945 0x1B132, // 1B132 ; HIRAGANA 6946 0x1B133, // 1B133..1B14F; UNKNOWN 6947 0x1B150, // 1B150..1B152; HIRAGANA 6948 0x1B153, // 1B153..1B154; UNKNOWN 6949 0x1B155, // 1B155 ; KATAKANA 6950 0x1B156, // 1B156..1B163; UNKNOWN 6951 0x1B164, // 1B164..1B167; KATAKANA 6952 0x1B168, // 1B168..1B16F; UNKNOWN 6953 0x1B170, // 1B170..1B2FB; NUSHU 6954 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6955 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6956 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6957 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6958 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6959 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6960 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6961 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6962 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6963 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6964 0x1BCA0, // 1BCA0..1BCA3; COMMON 6965 0x1BCA4, // 1BCA4..1CBFF; UNKNOWN 6966 0x1CC00, // 1CC00..1CCF9; COMMON 6967 0x1CCFA, // 1CCFA..1CCFF; UNKNOWN 6968 0x1CD00, // 1CD00..1CEB3; COMMON 6969 0x1CEB4, // 1CEB4..1CEFF; UNKNOWN 6970 0x1CF00, // 1CF00..1CF2D; INHERITED 6971 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6972 0x1CF30, // 1CF30..1CF46; INHERITED 6973 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6974 0x1CF50, // 1CF50..1CFC3; COMMON 6975 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6976 0x1D000, // 1D000..1D0F5; COMMON 6977 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6978 0x1D100, // 1D100..1D126; COMMON 6979 0x1D127, // 1D127..1D128; UNKNOWN 6980 0x1D129, // 1D129..1D166; COMMON 6981 0x1D167, // 1D167..1D169; INHERITED 6982 0x1D16A, // 1D16A..1D17A; COMMON 6983 0x1D17B, // 1D17B..1D182; INHERITED 6984 0x1D183, // 1D183..1D184; COMMON 6985 0x1D185, // 1D185..1D18B; INHERITED 6986 0x1D18C, // 1D18C..1D1A9; COMMON 6987 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6988 0x1D1AE, // 1D1AE..1D1EA; COMMON 6989 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6990 0x1D200, // 1D200..1D245; GREEK 6991 0x1D246, // 1D246..1D2BF; UNKNOWN 6992 0x1D2C0, // 1D2C0..1D2D3; COMMON 6993 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6994 0x1D2E0, // 1D2E0..1D2F3; COMMON 6995 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6996 0x1D300, // 1D300..1D356; COMMON 6997 0x1D357, // 1D357..1D35F; UNKNOWN 6998 0x1D360, // 1D360..1D378; COMMON 6999 0x1D379, // 1D379..1D3FF; UNKNOWN 7000 0x1D400, // 1D400..1D454; COMMON 7001 0x1D455, // 1D455 ; UNKNOWN 7002 0x1D456, // 1D456..1D49C; COMMON 7003 0x1D49D, // 1D49D ; UNKNOWN 7004 0x1D49E, // 1D49E..1D49F; COMMON 7005 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 7006 0x1D4A2, // 1D4A2 ; COMMON 7007 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 7008 0x1D4A5, // 1D4A5..1D4A6; COMMON 7009 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 7010 0x1D4A9, // 1D4A9..1D4AC; COMMON 7011 0x1D4AD, // 1D4AD ; UNKNOWN 7012 0x1D4AE, // 1D4AE..1D4B9; COMMON 7013 0x1D4BA, // 1D4BA ; UNKNOWN 7014 0x1D4BB, // 1D4BB ; COMMON 7015 0x1D4BC, // 1D4BC ; UNKNOWN 7016 0x1D4BD, // 1D4BD..1D4C3; COMMON 7017 0x1D4C4, // 1D4C4 ; UNKNOWN 7018 0x1D4C5, // 1D4C5..1D505; COMMON 7019 0x1D506, // 1D506 ; UNKNOWN 7020 0x1D507, // 1D507..1D50A; COMMON 7021 0x1D50B, // 1D50B..1D50C; UNKNOWN 7022 0x1D50D, // 1D50D..1D514; COMMON 7023 0x1D515, // 1D515 ; UNKNOWN 7024 0x1D516, // 1D516..1D51C; COMMON 7025 0x1D51D, // 1D51D ; UNKNOWN 7026 0x1D51E, // 1D51E..1D539; COMMON 7027 0x1D53A, // 1D53A ; UNKNOWN 7028 0x1D53B, // 1D53B..1D53E; COMMON 7029 0x1D53F, // 1D53F ; UNKNOWN 7030 0x1D540, // 1D540..1D544; COMMON 7031 0x1D545, // 1D545 ; UNKNOWN 7032 0x1D546, // 1D546 ; COMMON 7033 0x1D547, // 1D547..1D549; UNKNOWN 7034 0x1D54A, // 1D54A..1D550; COMMON 7035 0x1D551, // 1D551 ; UNKNOWN 7036 0x1D552, // 1D552..1D6A5; COMMON 7037 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 7038 0x1D6A8, // 1D6A8..1D7CB; COMMON 7039 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 7040 0x1D7CE, // 1D7CE..1D7FF; COMMON 7041 0x1D800, // 1D800..1DA8B; SIGNWRITING 7042 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 7043 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 7044 0x1DAA0, // 1DAA0 ; UNKNOWN 7045 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 7046 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 7047 0x1DF00, // 1DF00..1DF1E; LATIN 7048 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 7049 0x1DF25, // 1DF25..1DF2A; LATIN 7050 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 7051 0x1E000, // 1E000..1E006; GLAGOLITIC 7052 0x1E007, // 1E007 ; UNKNOWN 7053 0x1E008, // 1E008..1E018; GLAGOLITIC 7054 0x1E019, // 1E019..1E01A; UNKNOWN 7055 0x1E01B, // 1E01B..1E021; GLAGOLITIC 7056 0x1E022, // 1E022 ; UNKNOWN 7057 0x1E023, // 1E023..1E024; GLAGOLITIC 7058 0x1E025, // 1E025 ; UNKNOWN 7059 0x1E026, // 1E026..1E02A; GLAGOLITIC 7060 0x1E02B, // 1E02B..1E02F; UNKNOWN 7061 0x1E030, // 1E030..1E06D; CYRILLIC 7062 0x1E06E, // 1E06E..1E08E; UNKNOWN 7063 0x1E08F, // 1E08F ; CYRILLIC 7064 0x1E090, // 1E090..1E0FF; UNKNOWN 7065 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 7066 0x1E12D, // 1E12D..1E12F; UNKNOWN 7067 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 7068 0x1E13E, // 1E13E..1E13F; UNKNOWN 7069 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 7070 0x1E14A, // 1E14A..1E14D; UNKNOWN 7071 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 7072 0x1E150, // 1E150..1E28F; UNKNOWN 7073 0x1E290, // 1E290..1E2AE; TOTO 7074 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 7075 0x1E2C0, // 1E2C0..1E2F9; WANCHO 7076 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 7077 0x1E2FF, // 1E2FF ; WANCHO 7078 0x1E300, // 1E300..1E4CF; UNKNOWN 7079 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 7080 0x1E4FA, // 1E4FA..1E5CF; UNKNOWN 7081 0x1E5D0, // 1E5D0..1E5FA; OL_ONAL 7082 0x1E5FB, // 1E5FB..1E5FE; UNKNOWN 7083 0x1E5FF, // 1E5FF ; OL_ONAL 7084 0x1E600, // 1E600..1E7DF; UNKNOWN 7085 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 7086 0x1E7E7, // 1E7E7 ; UNKNOWN 7087 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 7088 0x1E7EC, // 1E7EC ; UNKNOWN 7089 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 7090 0x1E7EF, // 1E7EF ; UNKNOWN 7091 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 7092 0x1E7FF, // 1E7FF ; UNKNOWN 7093 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 7094 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 7095 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 7096 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 7097 0x1E900, // 1E900..1E94B; ADLAM 7098 0x1E94C, // 1E94C..1E94F; UNKNOWN 7099 0x1E950, // 1E950..1E959; ADLAM 7100 0x1E95A, // 1E95A..1E95D; UNKNOWN 7101 0x1E95E, // 1E95E..1E95F; ADLAM 7102 0x1E960, // 1E960..1EC70; UNKNOWN 7103 0x1EC71, // 1EC71..1ECB4; COMMON 7104 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 7105 0x1ED01, // 1ED01..1ED3D; COMMON 7106 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 7107 0x1EE00, // 1EE00..1EE03; ARABIC 7108 0x1EE04, // 1EE04 ; UNKNOWN 7109 0x1EE05, // 1EE05..1EE1F; ARABIC 7110 0x1EE20, // 1EE20 ; UNKNOWN 7111 0x1EE21, // 1EE21..1EE22; ARABIC 7112 0x1EE23, // 1EE23 ; UNKNOWN 7113 0x1EE24, // 1EE24 ; ARABIC 7114 0x1EE25, // 1EE25..1EE26; UNKNOWN 7115 0x1EE27, // 1EE27 ; ARABIC 7116 0x1EE28, // 1EE28 ; UNKNOWN 7117 0x1EE29, // 1EE29..1EE32; ARABIC 7118 0x1EE33, // 1EE33 ; UNKNOWN 7119 0x1EE34, // 1EE34..1EE37; ARABIC 7120 0x1EE38, // 1EE38 ; UNKNOWN 7121 0x1EE39, // 1EE39 ; ARABIC 7122 0x1EE3A, // 1EE3A ; UNKNOWN 7123 0x1EE3B, // 1EE3B ; ARABIC 7124 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 7125 0x1EE42, // 1EE42 ; ARABIC 7126 0x1EE43, // 1EE43..1EE46; UNKNOWN 7127 0x1EE47, // 1EE47 ; ARABIC 7128 0x1EE48, // 1EE48 ; UNKNOWN 7129 0x1EE49, // 1EE49 ; ARABIC 7130 0x1EE4A, // 1EE4A ; UNKNOWN 7131 0x1EE4B, // 1EE4B ; ARABIC 7132 0x1EE4C, // 1EE4C ; UNKNOWN 7133 0x1EE4D, // 1EE4D..1EE4F; ARABIC 7134 0x1EE50, // 1EE50 ; UNKNOWN 7135 0x1EE51, // 1EE51..1EE52; ARABIC 7136 0x1EE53, // 1EE53 ; UNKNOWN 7137 0x1EE54, // 1EE54 ; ARABIC 7138 0x1EE55, // 1EE55..1EE56; UNKNOWN 7139 0x1EE57, // 1EE57 ; ARABIC 7140 0x1EE58, // 1EE58 ; UNKNOWN 7141 0x1EE59, // 1EE59 ; ARABIC 7142 0x1EE5A, // 1EE5A ; UNKNOWN 7143 0x1EE5B, // 1EE5B ; ARABIC 7144 0x1EE5C, // 1EE5C ; UNKNOWN 7145 0x1EE5D, // 1EE5D ; ARABIC 7146 0x1EE5E, // 1EE5E ; UNKNOWN 7147 0x1EE5F, // 1EE5F ; ARABIC 7148 0x1EE60, // 1EE60 ; UNKNOWN 7149 0x1EE61, // 1EE61..1EE62; ARABIC 7150 0x1EE63, // 1EE63 ; UNKNOWN 7151 0x1EE64, // 1EE64 ; ARABIC 7152 0x1EE65, // 1EE65..1EE66; UNKNOWN 7153 0x1EE67, // 1EE67..1EE6A; ARABIC 7154 0x1EE6B, // 1EE6B ; UNKNOWN 7155 0x1EE6C, // 1EE6C..1EE72; ARABIC 7156 0x1EE73, // 1EE73 ; UNKNOWN 7157 0x1EE74, // 1EE74..1EE77; ARABIC 7158 0x1EE78, // 1EE78 ; UNKNOWN 7159 0x1EE79, // 1EE79..1EE7C; ARABIC 7160 0x1EE7D, // 1EE7D ; UNKNOWN 7161 0x1EE7E, // 1EE7E ; ARABIC 7162 0x1EE7F, // 1EE7F ; UNKNOWN 7163 0x1EE80, // 1EE80..1EE89; ARABIC 7164 0x1EE8A, // 1EE8A ; UNKNOWN 7165 0x1EE8B, // 1EE8B..1EE9B; ARABIC 7166 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 7167 0x1EEA1, // 1EEA1..1EEA3; ARABIC 7168 0x1EEA4, // 1EEA4 ; UNKNOWN 7169 0x1EEA5, // 1EEA5..1EEA9; ARABIC 7170 0x1EEAA, // 1EEAA ; UNKNOWN 7171 0x1EEAB, // 1EEAB..1EEBB; ARABIC 7172 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 7173 0x1EEF0, // 1EEF0..1EEF1; ARABIC 7174 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 7175 0x1F000, // 1F000..1F02B; COMMON 7176 0x1F02C, // 1F02C..1F02F; UNKNOWN 7177 0x1F030, // 1F030..1F093; COMMON 7178 0x1F094, // 1F094..1F09F; UNKNOWN 7179 0x1F0A0, // 1F0A0..1F0AE; COMMON 7180 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 7181 0x1F0B1, // 1F0B1..1F0BF; COMMON 7182 0x1F0C0, // 1F0C0 ; UNKNOWN 7183 0x1F0C1, // 1F0C1..1F0CF; COMMON 7184 0x1F0D0, // 1F0D0 ; UNKNOWN 7185 0x1F0D1, // 1F0D1..1F0F5; COMMON 7186 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 7187 0x1F100, // 1F100..1F1AD; COMMON 7188 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 7189 0x1F1E6, // 1F1E6..1F1FF; COMMON 7190 0x1F200, // 1F200 ; HIRAGANA 7191 0x1F201, // 1F201..1F202; COMMON 7192 0x1F203, // 1F203..1F20F; UNKNOWN 7193 0x1F210, // 1F210..1F23B; COMMON 7194 0x1F23C, // 1F23C..1F23F; UNKNOWN 7195 0x1F240, // 1F240..1F248; COMMON 7196 0x1F249, // 1F249..1F24F; UNKNOWN 7197 0x1F250, // 1F250..1F251; COMMON 7198 0x1F252, // 1F252..1F25F; UNKNOWN 7199 0x1F260, // 1F260..1F265; COMMON 7200 0x1F266, // 1F266..1F2FF; UNKNOWN 7201 0x1F300, // 1F300..1F6D7; COMMON 7202 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 7203 0x1F6DC, // 1F6DC..1F6EC; COMMON 7204 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 7205 0x1F6F0, // 1F6F0..1F6FC; COMMON 7206 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 7207 0x1F700, // 1F700..1F776; COMMON 7208 0x1F777, // 1F777..1F77A; UNKNOWN 7209 0x1F77B, // 1F77B..1F7D9; COMMON 7210 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7211 0x1F7E0, // 1F7E0..1F7EB; COMMON 7212 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7213 0x1F7F0, // 1F7F0 ; COMMON 7214 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7215 0x1F800, // 1F800..1F80B; COMMON 7216 0x1F80C, // 1F80C..1F80F; UNKNOWN 7217 0x1F810, // 1F810..1F847; COMMON 7218 0x1F848, // 1F848..1F84F; UNKNOWN 7219 0x1F850, // 1F850..1F859; COMMON 7220 0x1F85A, // 1F85A..1F85F; UNKNOWN 7221 0x1F860, // 1F860..1F887; COMMON 7222 0x1F888, // 1F888..1F88F; UNKNOWN 7223 0x1F890, // 1F890..1F8AD; COMMON 7224 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7225 0x1F8B0, // 1F8B0..1F8BB; COMMON 7226 0x1F8BC, // 1F8BC..1F8BF; UNKNOWN 7227 0x1F8C0, // 1F8C0..1F8C1; COMMON 7228 0x1F8C2, // 1F8C2..1F8FF; UNKNOWN 7229 0x1F900, // 1F900..1FA53; COMMON 7230 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7231 0x1FA60, // 1FA60..1FA6D; COMMON 7232 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7233 0x1FA70, // 1FA70..1FA7C; COMMON 7234 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7235 0x1FA80, // 1FA80..1FA89; COMMON 7236 0x1FA8A, // 1FA8A..1FA8E; UNKNOWN 7237 0x1FA8F, // 1FA8F..1FAC6; COMMON 7238 0x1FAC7, // 1FAC7..1FACD; UNKNOWN 7239 0x1FACE, // 1FACE..1FADC; COMMON 7240 0x1FADD, // 1FADD..1FADE; UNKNOWN 7241 0x1FADF, // 1FADF..1FAE9; COMMON 7242 0x1FAEA, // 1FAEA..1FAEF; UNKNOWN 7243 0x1FAF0, // 1FAF0..1FAF8; COMMON 7244 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7245 0x1FB00, // 1FB00..1FB92; COMMON 7246 0x1FB93, // 1FB93 ; UNKNOWN 7247 0x1FB94, // 1FB94..1FBF9; COMMON 7248 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7249 0x20000, // 20000..2A6DF; HAN 7250 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7251 0x2A700, // 2A700..2B739; HAN 7252 0x2B73A, // 2B73A..2B73F; UNKNOWN 7253 0x2B740, // 2B740..2B81D; HAN 7254 0x2B81E, // 2B81E..2B81F; UNKNOWN 7255 0x2B820, // 2B820..2CEA1; HAN 7256 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7257 0x2CEB0, // 2CEB0..2EBE0; HAN 7258 0x2EBE1, // 2EBE1..2EBEF; UNKNOWN 7259 0x2EBF0, // 2EBF0..2EE5D; HAN 7260 0x2EE5E, // 2EE5E..2F7FF; UNKNOWN 7261 0x2F800, // 2F800..2FA1D; HAN 7262 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7263 0x30000, // 30000..3134A; HAN 7264 0x3134B, // 3134B..3134F; UNKNOWN 7265 0x31350, // 31350..323AF; HAN 7266 0x323B0, // 323B0..E0000; UNKNOWN 7267 0xE0001, // E0001 ; COMMON 7268 0xE0002, // E0002..E001F; UNKNOWN 7269 0xE0020, // E0020..E007F; COMMON 7270 0xE0080, // E0080..E00FF; UNKNOWN 7271 0xE0100, // E0100..E01EF; INHERITED 7272 0xE01F0, // E01F0..10FFFF; UNKNOWN 7273 }; 7274 7275 private static final UnicodeScript[] scripts = { 7276 COMMON, // 0000..0040 7277 LATIN, // 0041..005A 7278 COMMON, // 005B..0060 7279 LATIN, // 0061..007A 7280 COMMON, // 007B..00A9 7281 LATIN, // 00AA 7282 COMMON, // 00AB..00B9 7283 LATIN, // 00BA 7284 COMMON, // 00BB..00BF 7285 LATIN, // 00C0..00D6 7286 COMMON, // 00D7 7287 LATIN, // 00D8..00F6 7288 COMMON, // 00F7 7289 LATIN, // 00F8..02B8 7290 COMMON, // 02B9..02DF 7291 LATIN, // 02E0..02E4 7292 COMMON, // 02E5..02E9 7293 BOPOMOFO, // 02EA..02EB 7294 COMMON, // 02EC..02FF 7295 INHERITED, // 0300..036F 7296 GREEK, // 0370..0373 7297 COMMON, // 0374 7298 GREEK, // 0375..0377 7299 UNKNOWN, // 0378..0379 7300 GREEK, // 037A..037D 7301 COMMON, // 037E 7302 GREEK, // 037F 7303 UNKNOWN, // 0380..0383 7304 GREEK, // 0384 7305 COMMON, // 0385 7306 GREEK, // 0386 7307 COMMON, // 0387 7308 GREEK, // 0388..038A 7309 UNKNOWN, // 038B 7310 GREEK, // 038C 7311 UNKNOWN, // 038D 7312 GREEK, // 038E..03A1 7313 UNKNOWN, // 03A2 7314 GREEK, // 03A3..03E1 7315 COPTIC, // 03E2..03EF 7316 GREEK, // 03F0..03FF 7317 CYRILLIC, // 0400..0484 7318 INHERITED, // 0485..0486 7319 CYRILLIC, // 0487..052F 7320 UNKNOWN, // 0530 7321 ARMENIAN, // 0531..0556 7322 UNKNOWN, // 0557..0558 7323 ARMENIAN, // 0559..058A 7324 UNKNOWN, // 058B..058C 7325 ARMENIAN, // 058D..058F 7326 UNKNOWN, // 0590 7327 HEBREW, // 0591..05C7 7328 UNKNOWN, // 05C8..05CF 7329 HEBREW, // 05D0..05EA 7330 UNKNOWN, // 05EB..05EE 7331 HEBREW, // 05EF..05F4 7332 UNKNOWN, // 05F5..05FF 7333 ARABIC, // 0600..0604 7334 COMMON, // 0605 7335 ARABIC, // 0606..060B 7336 COMMON, // 060C 7337 ARABIC, // 060D..061A 7338 COMMON, // 061B 7339 ARABIC, // 061C..061E 7340 COMMON, // 061F 7341 ARABIC, // 0620..063F 7342 COMMON, // 0640 7343 ARABIC, // 0641..064A 7344 INHERITED, // 064B..0655 7345 ARABIC, // 0656..066F 7346 INHERITED, // 0670 7347 ARABIC, // 0671..06DC 7348 COMMON, // 06DD 7349 ARABIC, // 06DE..06FF 7350 SYRIAC, // 0700..070D 7351 UNKNOWN, // 070E 7352 SYRIAC, // 070F..074A 7353 UNKNOWN, // 074B..074C 7354 SYRIAC, // 074D..074F 7355 ARABIC, // 0750..077F 7356 THAANA, // 0780..07B1 7357 UNKNOWN, // 07B2..07BF 7358 NKO, // 07C0..07FA 7359 UNKNOWN, // 07FB..07FC 7360 NKO, // 07FD..07FF 7361 SAMARITAN, // 0800..082D 7362 UNKNOWN, // 082E..082F 7363 SAMARITAN, // 0830..083E 7364 UNKNOWN, // 083F 7365 MANDAIC, // 0840..085B 7366 UNKNOWN, // 085C..085D 7367 MANDAIC, // 085E 7368 UNKNOWN, // 085F 7369 SYRIAC, // 0860..086A 7370 UNKNOWN, // 086B..086F 7371 ARABIC, // 0870..088E 7372 UNKNOWN, // 088F 7373 ARABIC, // 0890..0891 7374 UNKNOWN, // 0892..0896 7375 ARABIC, // 0897..08E1 7376 COMMON, // 08E2 7377 ARABIC, // 08E3..08FF 7378 DEVANAGARI, // 0900..0950 7379 INHERITED, // 0951..0954 7380 DEVANAGARI, // 0955..0963 7381 COMMON, // 0964..0965 7382 DEVANAGARI, // 0966..097F 7383 BENGALI, // 0980..0983 7384 UNKNOWN, // 0984 7385 BENGALI, // 0985..098C 7386 UNKNOWN, // 098D..098E 7387 BENGALI, // 098F..0990 7388 UNKNOWN, // 0991..0992 7389 BENGALI, // 0993..09A8 7390 UNKNOWN, // 09A9 7391 BENGALI, // 09AA..09B0 7392 UNKNOWN, // 09B1 7393 BENGALI, // 09B2 7394 UNKNOWN, // 09B3..09B5 7395 BENGALI, // 09B6..09B9 7396 UNKNOWN, // 09BA..09BB 7397 BENGALI, // 09BC..09C4 7398 UNKNOWN, // 09C5..09C6 7399 BENGALI, // 09C7..09C8 7400 UNKNOWN, // 09C9..09CA 7401 BENGALI, // 09CB..09CE 7402 UNKNOWN, // 09CF..09D6 7403 BENGALI, // 09D7 7404 UNKNOWN, // 09D8..09DB 7405 BENGALI, // 09DC..09DD 7406 UNKNOWN, // 09DE 7407 BENGALI, // 09DF..09E3 7408 UNKNOWN, // 09E4..09E5 7409 BENGALI, // 09E6..09FE 7410 UNKNOWN, // 09FF..0A00 7411 GURMUKHI, // 0A01..0A03 7412 UNKNOWN, // 0A04 7413 GURMUKHI, // 0A05..0A0A 7414 UNKNOWN, // 0A0B..0A0E 7415 GURMUKHI, // 0A0F..0A10 7416 UNKNOWN, // 0A11..0A12 7417 GURMUKHI, // 0A13..0A28 7418 UNKNOWN, // 0A29 7419 GURMUKHI, // 0A2A..0A30 7420 UNKNOWN, // 0A31 7421 GURMUKHI, // 0A32..0A33 7422 UNKNOWN, // 0A34 7423 GURMUKHI, // 0A35..0A36 7424 UNKNOWN, // 0A37 7425 GURMUKHI, // 0A38..0A39 7426 UNKNOWN, // 0A3A..0A3B 7427 GURMUKHI, // 0A3C 7428 UNKNOWN, // 0A3D 7429 GURMUKHI, // 0A3E..0A42 7430 UNKNOWN, // 0A43..0A46 7431 GURMUKHI, // 0A47..0A48 7432 UNKNOWN, // 0A49..0A4A 7433 GURMUKHI, // 0A4B..0A4D 7434 UNKNOWN, // 0A4E..0A50 7435 GURMUKHI, // 0A51 7436 UNKNOWN, // 0A52..0A58 7437 GURMUKHI, // 0A59..0A5C 7438 UNKNOWN, // 0A5D 7439 GURMUKHI, // 0A5E 7440 UNKNOWN, // 0A5F..0A65 7441 GURMUKHI, // 0A66..0A76 7442 UNKNOWN, // 0A77..0A80 7443 GUJARATI, // 0A81..0A83 7444 UNKNOWN, // 0A84 7445 GUJARATI, // 0A85..0A8D 7446 UNKNOWN, // 0A8E 7447 GUJARATI, // 0A8F..0A91 7448 UNKNOWN, // 0A92 7449 GUJARATI, // 0A93..0AA8 7450 UNKNOWN, // 0AA9 7451 GUJARATI, // 0AAA..0AB0 7452 UNKNOWN, // 0AB1 7453 GUJARATI, // 0AB2..0AB3 7454 UNKNOWN, // 0AB4 7455 GUJARATI, // 0AB5..0AB9 7456 UNKNOWN, // 0ABA..0ABB 7457 GUJARATI, // 0ABC..0AC5 7458 UNKNOWN, // 0AC6 7459 GUJARATI, // 0AC7..0AC9 7460 UNKNOWN, // 0ACA 7461 GUJARATI, // 0ACB..0ACD 7462 UNKNOWN, // 0ACE..0ACF 7463 GUJARATI, // 0AD0 7464 UNKNOWN, // 0AD1..0ADF 7465 GUJARATI, // 0AE0..0AE3 7466 UNKNOWN, // 0AE4..0AE5 7467 GUJARATI, // 0AE6..0AF1 7468 UNKNOWN, // 0AF2..0AF8 7469 GUJARATI, // 0AF9..0AFF 7470 UNKNOWN, // 0B00 7471 ORIYA, // 0B01..0B03 7472 UNKNOWN, // 0B04 7473 ORIYA, // 0B05..0B0C 7474 UNKNOWN, // 0B0D..0B0E 7475 ORIYA, // 0B0F..0B10 7476 UNKNOWN, // 0B11..0B12 7477 ORIYA, // 0B13..0B28 7478 UNKNOWN, // 0B29 7479 ORIYA, // 0B2A..0B30 7480 UNKNOWN, // 0B31 7481 ORIYA, // 0B32..0B33 7482 UNKNOWN, // 0B34 7483 ORIYA, // 0B35..0B39 7484 UNKNOWN, // 0B3A..0B3B 7485 ORIYA, // 0B3C..0B44 7486 UNKNOWN, // 0B45..0B46 7487 ORIYA, // 0B47..0B48 7488 UNKNOWN, // 0B49..0B4A 7489 ORIYA, // 0B4B..0B4D 7490 UNKNOWN, // 0B4E..0B54 7491 ORIYA, // 0B55..0B57 7492 UNKNOWN, // 0B58..0B5B 7493 ORIYA, // 0B5C..0B5D 7494 UNKNOWN, // 0B5E 7495 ORIYA, // 0B5F..0B63 7496 UNKNOWN, // 0B64..0B65 7497 ORIYA, // 0B66..0B77 7498 UNKNOWN, // 0B78..0B81 7499 TAMIL, // 0B82..0B83 7500 UNKNOWN, // 0B84 7501 TAMIL, // 0B85..0B8A 7502 UNKNOWN, // 0B8B..0B8D 7503 TAMIL, // 0B8E..0B90 7504 UNKNOWN, // 0B91 7505 TAMIL, // 0B92..0B95 7506 UNKNOWN, // 0B96..0B98 7507 TAMIL, // 0B99..0B9A 7508 UNKNOWN, // 0B9B 7509 TAMIL, // 0B9C 7510 UNKNOWN, // 0B9D 7511 TAMIL, // 0B9E..0B9F 7512 UNKNOWN, // 0BA0..0BA2 7513 TAMIL, // 0BA3..0BA4 7514 UNKNOWN, // 0BA5..0BA7 7515 TAMIL, // 0BA8..0BAA 7516 UNKNOWN, // 0BAB..0BAD 7517 TAMIL, // 0BAE..0BB9 7518 UNKNOWN, // 0BBA..0BBD 7519 TAMIL, // 0BBE..0BC2 7520 UNKNOWN, // 0BC3..0BC5 7521 TAMIL, // 0BC6..0BC8 7522 UNKNOWN, // 0BC9 7523 TAMIL, // 0BCA..0BCD 7524 UNKNOWN, // 0BCE..0BCF 7525 TAMIL, // 0BD0 7526 UNKNOWN, // 0BD1..0BD6 7527 TAMIL, // 0BD7 7528 UNKNOWN, // 0BD8..0BE5 7529 TAMIL, // 0BE6..0BFA 7530 UNKNOWN, // 0BFB..0BFF 7531 TELUGU, // 0C00..0C0C 7532 UNKNOWN, // 0C0D 7533 TELUGU, // 0C0E..0C10 7534 UNKNOWN, // 0C11 7535 TELUGU, // 0C12..0C28 7536 UNKNOWN, // 0C29 7537 TELUGU, // 0C2A..0C39 7538 UNKNOWN, // 0C3A..0C3B 7539 TELUGU, // 0C3C..0C44 7540 UNKNOWN, // 0C45 7541 TELUGU, // 0C46..0C48 7542 UNKNOWN, // 0C49 7543 TELUGU, // 0C4A..0C4D 7544 UNKNOWN, // 0C4E..0C54 7545 TELUGU, // 0C55..0C56 7546 UNKNOWN, // 0C57 7547 TELUGU, // 0C58..0C5A 7548 UNKNOWN, // 0C5B..0C5C 7549 TELUGU, // 0C5D 7550 UNKNOWN, // 0C5E..0C5F 7551 TELUGU, // 0C60..0C63 7552 UNKNOWN, // 0C64..0C65 7553 TELUGU, // 0C66..0C6F 7554 UNKNOWN, // 0C70..0C76 7555 TELUGU, // 0C77..0C7F 7556 KANNADA, // 0C80..0C8C 7557 UNKNOWN, // 0C8D 7558 KANNADA, // 0C8E..0C90 7559 UNKNOWN, // 0C91 7560 KANNADA, // 0C92..0CA8 7561 UNKNOWN, // 0CA9 7562 KANNADA, // 0CAA..0CB3 7563 UNKNOWN, // 0CB4 7564 KANNADA, // 0CB5..0CB9 7565 UNKNOWN, // 0CBA..0CBB 7566 KANNADA, // 0CBC..0CC4 7567 UNKNOWN, // 0CC5 7568 KANNADA, // 0CC6..0CC8 7569 UNKNOWN, // 0CC9 7570 KANNADA, // 0CCA..0CCD 7571 UNKNOWN, // 0CCE..0CD4 7572 KANNADA, // 0CD5..0CD6 7573 UNKNOWN, // 0CD7..0CDC 7574 KANNADA, // 0CDD..0CDE 7575 UNKNOWN, // 0CDF 7576 KANNADA, // 0CE0..0CE3 7577 UNKNOWN, // 0CE4..0CE5 7578 KANNADA, // 0CE6..0CEF 7579 UNKNOWN, // 0CF0 7580 KANNADA, // 0CF1..0CF3 7581 UNKNOWN, // 0CF4..0CFF 7582 MALAYALAM, // 0D00..0D0C 7583 UNKNOWN, // 0D0D 7584 MALAYALAM, // 0D0E..0D10 7585 UNKNOWN, // 0D11 7586 MALAYALAM, // 0D12..0D44 7587 UNKNOWN, // 0D45 7588 MALAYALAM, // 0D46..0D48 7589 UNKNOWN, // 0D49 7590 MALAYALAM, // 0D4A..0D4F 7591 UNKNOWN, // 0D50..0D53 7592 MALAYALAM, // 0D54..0D63 7593 UNKNOWN, // 0D64..0D65 7594 MALAYALAM, // 0D66..0D7F 7595 UNKNOWN, // 0D80 7596 SINHALA, // 0D81..0D83 7597 UNKNOWN, // 0D84 7598 SINHALA, // 0D85..0D96 7599 UNKNOWN, // 0D97..0D99 7600 SINHALA, // 0D9A..0DB1 7601 UNKNOWN, // 0DB2 7602 SINHALA, // 0DB3..0DBB 7603 UNKNOWN, // 0DBC 7604 SINHALA, // 0DBD 7605 UNKNOWN, // 0DBE..0DBF 7606 SINHALA, // 0DC0..0DC6 7607 UNKNOWN, // 0DC7..0DC9 7608 SINHALA, // 0DCA 7609 UNKNOWN, // 0DCB..0DCE 7610 SINHALA, // 0DCF..0DD4 7611 UNKNOWN, // 0DD5 7612 SINHALA, // 0DD6 7613 UNKNOWN, // 0DD7 7614 SINHALA, // 0DD8..0DDF 7615 UNKNOWN, // 0DE0..0DE5 7616 SINHALA, // 0DE6..0DEF 7617 UNKNOWN, // 0DF0..0DF1 7618 SINHALA, // 0DF2..0DF4 7619 UNKNOWN, // 0DF5..0E00 7620 THAI, // 0E01..0E3A 7621 UNKNOWN, // 0E3B..0E3E 7622 COMMON, // 0E3F 7623 THAI, // 0E40..0E5B 7624 UNKNOWN, // 0E5C..0E80 7625 LAO, // 0E81..0E82 7626 UNKNOWN, // 0E83 7627 LAO, // 0E84 7628 UNKNOWN, // 0E85 7629 LAO, // 0E86..0E8A 7630 UNKNOWN, // 0E8B 7631 LAO, // 0E8C..0EA3 7632 UNKNOWN, // 0EA4 7633 LAO, // 0EA5 7634 UNKNOWN, // 0EA6 7635 LAO, // 0EA7..0EBD 7636 UNKNOWN, // 0EBE..0EBF 7637 LAO, // 0EC0..0EC4 7638 UNKNOWN, // 0EC5 7639 LAO, // 0EC6 7640 UNKNOWN, // 0EC7 7641 LAO, // 0EC8..0ECE 7642 UNKNOWN, // 0ECF 7643 LAO, // 0ED0..0ED9 7644 UNKNOWN, // 0EDA..0EDB 7645 LAO, // 0EDC..0EDF 7646 UNKNOWN, // 0EE0..0EFF 7647 TIBETAN, // 0F00..0F47 7648 UNKNOWN, // 0F48 7649 TIBETAN, // 0F49..0F6C 7650 UNKNOWN, // 0F6D..0F70 7651 TIBETAN, // 0F71..0F97 7652 UNKNOWN, // 0F98 7653 TIBETAN, // 0F99..0FBC 7654 UNKNOWN, // 0FBD 7655 TIBETAN, // 0FBE..0FCC 7656 UNKNOWN, // 0FCD 7657 TIBETAN, // 0FCE..0FD4 7658 COMMON, // 0FD5..0FD8 7659 TIBETAN, // 0FD9..0FDA 7660 UNKNOWN, // 0FDB..0FFF 7661 MYANMAR, // 1000..109F 7662 GEORGIAN, // 10A0..10C5 7663 UNKNOWN, // 10C6 7664 GEORGIAN, // 10C7 7665 UNKNOWN, // 10C8..10CC 7666 GEORGIAN, // 10CD 7667 UNKNOWN, // 10CE..10CF 7668 GEORGIAN, // 10D0..10FA 7669 COMMON, // 10FB 7670 GEORGIAN, // 10FC..10FF 7671 HANGUL, // 1100..11FF 7672 ETHIOPIC, // 1200..1248 7673 UNKNOWN, // 1249 7674 ETHIOPIC, // 124A..124D 7675 UNKNOWN, // 124E..124F 7676 ETHIOPIC, // 1250..1256 7677 UNKNOWN, // 1257 7678 ETHIOPIC, // 1258 7679 UNKNOWN, // 1259 7680 ETHIOPIC, // 125A..125D 7681 UNKNOWN, // 125E..125F 7682 ETHIOPIC, // 1260..1288 7683 UNKNOWN, // 1289 7684 ETHIOPIC, // 128A..128D 7685 UNKNOWN, // 128E..128F 7686 ETHIOPIC, // 1290..12B0 7687 UNKNOWN, // 12B1 7688 ETHIOPIC, // 12B2..12B5 7689 UNKNOWN, // 12B6..12B7 7690 ETHIOPIC, // 12B8..12BE 7691 UNKNOWN, // 12BF 7692 ETHIOPIC, // 12C0 7693 UNKNOWN, // 12C1 7694 ETHIOPIC, // 12C2..12C5 7695 UNKNOWN, // 12C6..12C7 7696 ETHIOPIC, // 12C8..12D6 7697 UNKNOWN, // 12D7 7698 ETHIOPIC, // 12D8..1310 7699 UNKNOWN, // 1311 7700 ETHIOPIC, // 1312..1315 7701 UNKNOWN, // 1316..1317 7702 ETHIOPIC, // 1318..135A 7703 UNKNOWN, // 135B..135C 7704 ETHIOPIC, // 135D..137C 7705 UNKNOWN, // 137D..137F 7706 ETHIOPIC, // 1380..1399 7707 UNKNOWN, // 139A..139F 7708 CHEROKEE, // 13A0..13F5 7709 UNKNOWN, // 13F6..13F7 7710 CHEROKEE, // 13F8..13FD 7711 UNKNOWN, // 13FE..13FF 7712 CANADIAN_ABORIGINAL, // 1400..167F 7713 OGHAM, // 1680..169C 7714 UNKNOWN, // 169D..169F 7715 RUNIC, // 16A0..16EA 7716 COMMON, // 16EB..16ED 7717 RUNIC, // 16EE..16F8 7718 UNKNOWN, // 16F9..16FF 7719 TAGALOG, // 1700..1715 7720 UNKNOWN, // 1716..171E 7721 TAGALOG, // 171F 7722 HANUNOO, // 1720..1734 7723 COMMON, // 1735..1736 7724 UNKNOWN, // 1737..173F 7725 BUHID, // 1740..1753 7726 UNKNOWN, // 1754..175F 7727 TAGBANWA, // 1760..176C 7728 UNKNOWN, // 176D 7729 TAGBANWA, // 176E..1770 7730 UNKNOWN, // 1771 7731 TAGBANWA, // 1772..1773 7732 UNKNOWN, // 1774..177F 7733 KHMER, // 1780..17DD 7734 UNKNOWN, // 17DE..17DF 7735 KHMER, // 17E0..17E9 7736 UNKNOWN, // 17EA..17EF 7737 KHMER, // 17F0..17F9 7738 UNKNOWN, // 17FA..17FF 7739 MONGOLIAN, // 1800..1801 7740 COMMON, // 1802..1803 7741 MONGOLIAN, // 1804 7742 COMMON, // 1805 7743 MONGOLIAN, // 1806..1819 7744 UNKNOWN, // 181A..181F 7745 MONGOLIAN, // 1820..1878 7746 UNKNOWN, // 1879..187F 7747 MONGOLIAN, // 1880..18AA 7748 UNKNOWN, // 18AB..18AF 7749 CANADIAN_ABORIGINAL, // 18B0..18F5 7750 UNKNOWN, // 18F6..18FF 7751 LIMBU, // 1900..191E 7752 UNKNOWN, // 191F 7753 LIMBU, // 1920..192B 7754 UNKNOWN, // 192C..192F 7755 LIMBU, // 1930..193B 7756 UNKNOWN, // 193C..193F 7757 LIMBU, // 1940 7758 UNKNOWN, // 1941..1943 7759 LIMBU, // 1944..194F 7760 TAI_LE, // 1950..196D 7761 UNKNOWN, // 196E..196F 7762 TAI_LE, // 1970..1974 7763 UNKNOWN, // 1975..197F 7764 NEW_TAI_LUE, // 1980..19AB 7765 UNKNOWN, // 19AC..19AF 7766 NEW_TAI_LUE, // 19B0..19C9 7767 UNKNOWN, // 19CA..19CF 7768 NEW_TAI_LUE, // 19D0..19DA 7769 UNKNOWN, // 19DB..19DD 7770 NEW_TAI_LUE, // 19DE..19DF 7771 KHMER, // 19E0..19FF 7772 BUGINESE, // 1A00..1A1B 7773 UNKNOWN, // 1A1C..1A1D 7774 BUGINESE, // 1A1E..1A1F 7775 TAI_THAM, // 1A20..1A5E 7776 UNKNOWN, // 1A5F 7777 TAI_THAM, // 1A60..1A7C 7778 UNKNOWN, // 1A7D..1A7E 7779 TAI_THAM, // 1A7F..1A89 7780 UNKNOWN, // 1A8A..1A8F 7781 TAI_THAM, // 1A90..1A99 7782 UNKNOWN, // 1A9A..1A9F 7783 TAI_THAM, // 1AA0..1AAD 7784 UNKNOWN, // 1AAE..1AAF 7785 INHERITED, // 1AB0..1ACE 7786 UNKNOWN, // 1ACF..1AFF 7787 BALINESE, // 1B00..1B4C 7788 UNKNOWN, // 1B4D 7789 BALINESE, // 1B4E..1B7F 7790 SUNDANESE, // 1B80..1BBF 7791 BATAK, // 1BC0..1BF3 7792 UNKNOWN, // 1BF4..1BFB 7793 BATAK, // 1BFC..1BFF 7794 LEPCHA, // 1C00..1C37 7795 UNKNOWN, // 1C38..1C3A 7796 LEPCHA, // 1C3B..1C49 7797 UNKNOWN, // 1C4A..1C4C 7798 LEPCHA, // 1C4D..1C4F 7799 OL_CHIKI, // 1C50..1C7F 7800 CYRILLIC, // 1C80..1C8A 7801 UNKNOWN, // 1C8B..1C8F 7802 GEORGIAN, // 1C90..1CBA 7803 UNKNOWN, // 1CBB..1CBC 7804 GEORGIAN, // 1CBD..1CBF 7805 SUNDANESE, // 1CC0..1CC7 7806 UNKNOWN, // 1CC8..1CCF 7807 INHERITED, // 1CD0..1CD2 7808 COMMON, // 1CD3 7809 INHERITED, // 1CD4..1CE0 7810 COMMON, // 1CE1 7811 INHERITED, // 1CE2..1CE8 7812 COMMON, // 1CE9..1CEC 7813 INHERITED, // 1CED 7814 COMMON, // 1CEE..1CF3 7815 INHERITED, // 1CF4 7816 COMMON, // 1CF5..1CF7 7817 INHERITED, // 1CF8..1CF9 7818 COMMON, // 1CFA 7819 UNKNOWN, // 1CFB..1CFF 7820 LATIN, // 1D00..1D25 7821 GREEK, // 1D26..1D2A 7822 CYRILLIC, // 1D2B 7823 LATIN, // 1D2C..1D5C 7824 GREEK, // 1D5D..1D61 7825 LATIN, // 1D62..1D65 7826 GREEK, // 1D66..1D6A 7827 LATIN, // 1D6B..1D77 7828 CYRILLIC, // 1D78 7829 LATIN, // 1D79..1DBE 7830 GREEK, // 1DBF 7831 INHERITED, // 1DC0..1DFF 7832 LATIN, // 1E00..1EFF 7833 GREEK, // 1F00..1F15 7834 UNKNOWN, // 1F16..1F17 7835 GREEK, // 1F18..1F1D 7836 UNKNOWN, // 1F1E..1F1F 7837 GREEK, // 1F20..1F45 7838 UNKNOWN, // 1F46..1F47 7839 GREEK, // 1F48..1F4D 7840 UNKNOWN, // 1F4E..1F4F 7841 GREEK, // 1F50..1F57 7842 UNKNOWN, // 1F58 7843 GREEK, // 1F59 7844 UNKNOWN, // 1F5A 7845 GREEK, // 1F5B 7846 UNKNOWN, // 1F5C 7847 GREEK, // 1F5D 7848 UNKNOWN, // 1F5E 7849 GREEK, // 1F5F..1F7D 7850 UNKNOWN, // 1F7E..1F7F 7851 GREEK, // 1F80..1FB4 7852 UNKNOWN, // 1FB5 7853 GREEK, // 1FB6..1FC4 7854 UNKNOWN, // 1FC5 7855 GREEK, // 1FC6..1FD3 7856 UNKNOWN, // 1FD4..1FD5 7857 GREEK, // 1FD6..1FDB 7858 UNKNOWN, // 1FDC 7859 GREEK, // 1FDD..1FEF 7860 UNKNOWN, // 1FF0..1FF1 7861 GREEK, // 1FF2..1FF4 7862 UNKNOWN, // 1FF5 7863 GREEK, // 1FF6..1FFE 7864 UNKNOWN, // 1FFF 7865 COMMON, // 2000..200B 7866 INHERITED, // 200C..200D 7867 COMMON, // 200E..2064 7868 UNKNOWN, // 2065 7869 COMMON, // 2066..2070 7870 LATIN, // 2071 7871 UNKNOWN, // 2072..2073 7872 COMMON, // 2074..207E 7873 LATIN, // 207F 7874 COMMON, // 2080..208E 7875 UNKNOWN, // 208F 7876 LATIN, // 2090..209C 7877 UNKNOWN, // 209D..209F 7878 COMMON, // 20A0..20C0 7879 UNKNOWN, // 20C1..20CF 7880 INHERITED, // 20D0..20F0 7881 UNKNOWN, // 20F1..20FF 7882 COMMON, // 2100..2125 7883 GREEK, // 2126 7884 COMMON, // 2127..2129 7885 LATIN, // 212A..212B 7886 COMMON, // 212C..2131 7887 LATIN, // 2132 7888 COMMON, // 2133..214D 7889 LATIN, // 214E 7890 COMMON, // 214F..215F 7891 LATIN, // 2160..2188 7892 COMMON, // 2189..218B 7893 UNKNOWN, // 218C..218F 7894 COMMON, // 2190..2429 7895 UNKNOWN, // 242A..243F 7896 COMMON, // 2440..244A 7897 UNKNOWN, // 244B..245F 7898 COMMON, // 2460..27FF 7899 BRAILLE, // 2800..28FF 7900 COMMON, // 2900..2B73 7901 UNKNOWN, // 2B74..2B75 7902 COMMON, // 2B76..2B95 7903 UNKNOWN, // 2B96 7904 COMMON, // 2B97..2BFF 7905 GLAGOLITIC, // 2C00..2C5F 7906 LATIN, // 2C60..2C7F 7907 COPTIC, // 2C80..2CF3 7908 UNKNOWN, // 2CF4..2CF8 7909 COPTIC, // 2CF9..2CFF 7910 GEORGIAN, // 2D00..2D25 7911 UNKNOWN, // 2D26 7912 GEORGIAN, // 2D27 7913 UNKNOWN, // 2D28..2D2C 7914 GEORGIAN, // 2D2D 7915 UNKNOWN, // 2D2E..2D2F 7916 TIFINAGH, // 2D30..2D67 7917 UNKNOWN, // 2D68..2D6E 7918 TIFINAGH, // 2D6F..2D70 7919 UNKNOWN, // 2D71..2D7E 7920 TIFINAGH, // 2D7F 7921 ETHIOPIC, // 2D80..2D96 7922 UNKNOWN, // 2D97..2D9F 7923 ETHIOPIC, // 2DA0..2DA6 7924 UNKNOWN, // 2DA7 7925 ETHIOPIC, // 2DA8..2DAE 7926 UNKNOWN, // 2DAF 7927 ETHIOPIC, // 2DB0..2DB6 7928 UNKNOWN, // 2DB7 7929 ETHIOPIC, // 2DB8..2DBE 7930 UNKNOWN, // 2DBF 7931 ETHIOPIC, // 2DC0..2DC6 7932 UNKNOWN, // 2DC7 7933 ETHIOPIC, // 2DC8..2DCE 7934 UNKNOWN, // 2DCF 7935 ETHIOPIC, // 2DD0..2DD6 7936 UNKNOWN, // 2DD7 7937 ETHIOPIC, // 2DD8..2DDE 7938 UNKNOWN, // 2DDF 7939 CYRILLIC, // 2DE0..2DFF 7940 COMMON, // 2E00..2E5D 7941 UNKNOWN, // 2E5E..2E7F 7942 HAN, // 2E80..2E99 7943 UNKNOWN, // 2E9A 7944 HAN, // 2E9B..2EF3 7945 UNKNOWN, // 2EF4..2EFF 7946 HAN, // 2F00..2FD5 7947 UNKNOWN, // 2FD6..2FEF 7948 COMMON, // 2FF0..3004 7949 HAN, // 3005 7950 COMMON, // 3006 7951 HAN, // 3007 7952 COMMON, // 3008..3020 7953 HAN, // 3021..3029 7954 INHERITED, // 302A..302D 7955 HANGUL, // 302E..302F 7956 COMMON, // 3030..3037 7957 HAN, // 3038..303B 7958 COMMON, // 303C..303F 7959 UNKNOWN, // 3040 7960 HIRAGANA, // 3041..3096 7961 UNKNOWN, // 3097..3098 7962 INHERITED, // 3099..309A 7963 COMMON, // 309B..309C 7964 HIRAGANA, // 309D..309F 7965 COMMON, // 30A0 7966 KATAKANA, // 30A1..30FA 7967 COMMON, // 30FB..30FC 7968 KATAKANA, // 30FD..30FF 7969 UNKNOWN, // 3100..3104 7970 BOPOMOFO, // 3105..312F 7971 UNKNOWN, // 3130 7972 HANGUL, // 3131..318E 7973 UNKNOWN, // 318F 7974 COMMON, // 3190..319F 7975 BOPOMOFO, // 31A0..31BF 7976 COMMON, // 31C0..31E5 7977 UNKNOWN, // 31E6..31EE 7978 COMMON, // 31EF 7979 KATAKANA, // 31F0..31FF 7980 HANGUL, // 3200..321E 7981 UNKNOWN, // 321F 7982 COMMON, // 3220..325F 7983 HANGUL, // 3260..327E 7984 COMMON, // 327F..32CF 7985 KATAKANA, // 32D0..32FE 7986 COMMON, // 32FF 7987 KATAKANA, // 3300..3357 7988 COMMON, // 3358..33FF 7989 HAN, // 3400..4DBF 7990 COMMON, // 4DC0..4DFF 7991 HAN, // 4E00..9FFF 7992 YI, // A000..A48C 7993 UNKNOWN, // A48D..A48F 7994 YI, // A490..A4C6 7995 UNKNOWN, // A4C7..A4CF 7996 LISU, // A4D0..A4FF 7997 VAI, // A500..A62B 7998 UNKNOWN, // A62C..A63F 7999 CYRILLIC, // A640..A69F 8000 BAMUM, // A6A0..A6F7 8001 UNKNOWN, // A6F8..A6FF 8002 COMMON, // A700..A721 8003 LATIN, // A722..A787 8004 COMMON, // A788..A78A 8005 LATIN, // A78B..A7CD 8006 UNKNOWN, // A7CE..A7CF 8007 LATIN, // A7D0..A7D1 8008 UNKNOWN, // A7D2 8009 LATIN, // A7D3 8010 UNKNOWN, // A7D4 8011 LATIN, // A7D5..A7DC 8012 UNKNOWN, // A7DD..A7F1 8013 LATIN, // A7F2..A7FF 8014 SYLOTI_NAGRI, // A800..A82C 8015 UNKNOWN, // A82D..A82F 8016 COMMON, // A830..A839 8017 UNKNOWN, // A83A..A83F 8018 PHAGS_PA, // A840..A877 8019 UNKNOWN, // A878..A87F 8020 SAURASHTRA, // A880..A8C5 8021 UNKNOWN, // A8C6..A8CD 8022 SAURASHTRA, // A8CE..A8D9 8023 UNKNOWN, // A8DA..A8DF 8024 DEVANAGARI, // A8E0..A8FF 8025 KAYAH_LI, // A900..A92D 8026 COMMON, // A92E 8027 KAYAH_LI, // A92F 8028 REJANG, // A930..A953 8029 UNKNOWN, // A954..A95E 8030 REJANG, // A95F 8031 HANGUL, // A960..A97C 8032 UNKNOWN, // A97D..A97F 8033 JAVANESE, // A980..A9CD 8034 UNKNOWN, // A9CE 8035 COMMON, // A9CF 8036 JAVANESE, // A9D0..A9D9 8037 UNKNOWN, // A9DA..A9DD 8038 JAVANESE, // A9DE..A9DF 8039 MYANMAR, // A9E0..A9FE 8040 UNKNOWN, // A9FF 8041 CHAM, // AA00..AA36 8042 UNKNOWN, // AA37..AA3F 8043 CHAM, // AA40..AA4D 8044 UNKNOWN, // AA4E..AA4F 8045 CHAM, // AA50..AA59 8046 UNKNOWN, // AA5A..AA5B 8047 CHAM, // AA5C..AA5F 8048 MYANMAR, // AA60..AA7F 8049 TAI_VIET, // AA80..AAC2 8050 UNKNOWN, // AAC3..AADA 8051 TAI_VIET, // AADB..AADF 8052 MEETEI_MAYEK, // AAE0..AAF6 8053 UNKNOWN, // AAF7..AB00 8054 ETHIOPIC, // AB01..AB06 8055 UNKNOWN, // AB07..AB08 8056 ETHIOPIC, // AB09..AB0E 8057 UNKNOWN, // AB0F..AB10 8058 ETHIOPIC, // AB11..AB16 8059 UNKNOWN, // AB17..AB1F 8060 ETHIOPIC, // AB20..AB26 8061 UNKNOWN, // AB27 8062 ETHIOPIC, // AB28..AB2E 8063 UNKNOWN, // AB2F 8064 LATIN, // AB30..AB5A 8065 COMMON, // AB5B 8066 LATIN, // AB5C..AB64 8067 GREEK, // AB65 8068 LATIN, // AB66..AB69 8069 COMMON, // AB6A..AB6B 8070 UNKNOWN, // AB6C..AB6F 8071 CHEROKEE, // AB70..ABBF 8072 MEETEI_MAYEK, // ABC0..ABED 8073 UNKNOWN, // ABEE..ABEF 8074 MEETEI_MAYEK, // ABF0..ABF9 8075 UNKNOWN, // ABFA..ABFF 8076 HANGUL, // AC00..D7A3 8077 UNKNOWN, // D7A4..D7AF 8078 HANGUL, // D7B0..D7C6 8079 UNKNOWN, // D7C7..D7CA 8080 HANGUL, // D7CB..D7FB 8081 UNKNOWN, // D7FC..F8FF 8082 HAN, // F900..FA6D 8083 UNKNOWN, // FA6E..FA6F 8084 HAN, // FA70..FAD9 8085 UNKNOWN, // FADA..FAFF 8086 LATIN, // FB00..FB06 8087 UNKNOWN, // FB07..FB12 8088 ARMENIAN, // FB13..FB17 8089 UNKNOWN, // FB18..FB1C 8090 HEBREW, // FB1D..FB36 8091 UNKNOWN, // FB37 8092 HEBREW, // FB38..FB3C 8093 UNKNOWN, // FB3D 8094 HEBREW, // FB3E 8095 UNKNOWN, // FB3F 8096 HEBREW, // FB40..FB41 8097 UNKNOWN, // FB42 8098 HEBREW, // FB43..FB44 8099 UNKNOWN, // FB45 8100 HEBREW, // FB46..FB4F 8101 ARABIC, // FB50..FBC2 8102 UNKNOWN, // FBC3..FBD2 8103 ARABIC, // FBD3..FD3D 8104 COMMON, // FD3E..FD3F 8105 ARABIC, // FD40..FD8F 8106 UNKNOWN, // FD90..FD91 8107 ARABIC, // FD92..FDC7 8108 UNKNOWN, // FDC8..FDCE 8109 ARABIC, // FDCF 8110 UNKNOWN, // FDD0..FDEF 8111 ARABIC, // FDF0..FDFF 8112 INHERITED, // FE00..FE0F 8113 COMMON, // FE10..FE19 8114 UNKNOWN, // FE1A..FE1F 8115 INHERITED, // FE20..FE2D 8116 CYRILLIC, // FE2E..FE2F 8117 COMMON, // FE30..FE52 8118 UNKNOWN, // FE53 8119 COMMON, // FE54..FE66 8120 UNKNOWN, // FE67 8121 COMMON, // FE68..FE6B 8122 UNKNOWN, // FE6C..FE6F 8123 ARABIC, // FE70..FE74 8124 UNKNOWN, // FE75 8125 ARABIC, // FE76..FEFC 8126 UNKNOWN, // FEFD..FEFE 8127 COMMON, // FEFF 8128 UNKNOWN, // FF00 8129 COMMON, // FF01..FF20 8130 LATIN, // FF21..FF3A 8131 COMMON, // FF3B..FF40 8132 LATIN, // FF41..FF5A 8133 COMMON, // FF5B..FF65 8134 KATAKANA, // FF66..FF6F 8135 COMMON, // FF70 8136 KATAKANA, // FF71..FF9D 8137 COMMON, // FF9E..FF9F 8138 HANGUL, // FFA0..FFBE 8139 UNKNOWN, // FFBF..FFC1 8140 HANGUL, // FFC2..FFC7 8141 UNKNOWN, // FFC8..FFC9 8142 HANGUL, // FFCA..FFCF 8143 UNKNOWN, // FFD0..FFD1 8144 HANGUL, // FFD2..FFD7 8145 UNKNOWN, // FFD8..FFD9 8146 HANGUL, // FFDA..FFDC 8147 UNKNOWN, // FFDD..FFDF 8148 COMMON, // FFE0..FFE6 8149 UNKNOWN, // FFE7 8150 COMMON, // FFE8..FFEE 8151 UNKNOWN, // FFEF..FFF8 8152 COMMON, // FFF9..FFFD 8153 UNKNOWN, // FFFE..FFFF 8154 LINEAR_B, // 10000..1000B 8155 UNKNOWN, // 1000C 8156 LINEAR_B, // 1000D..10026 8157 UNKNOWN, // 10027 8158 LINEAR_B, // 10028..1003A 8159 UNKNOWN, // 1003B 8160 LINEAR_B, // 1003C..1003D 8161 UNKNOWN, // 1003E 8162 LINEAR_B, // 1003F..1004D 8163 UNKNOWN, // 1004E..1004F 8164 LINEAR_B, // 10050..1005D 8165 UNKNOWN, // 1005E..1007F 8166 LINEAR_B, // 10080..100FA 8167 UNKNOWN, // 100FB..100FF 8168 COMMON, // 10100..10102 8169 UNKNOWN, // 10103..10106 8170 COMMON, // 10107..10133 8171 UNKNOWN, // 10134..10136 8172 COMMON, // 10137..1013F 8173 GREEK, // 10140..1018E 8174 UNKNOWN, // 1018F 8175 COMMON, // 10190..1019C 8176 UNKNOWN, // 1019D..1019F 8177 GREEK, // 101A0 8178 UNKNOWN, // 101A1..101CF 8179 COMMON, // 101D0..101FC 8180 INHERITED, // 101FD 8181 UNKNOWN, // 101FE..1027F 8182 LYCIAN, // 10280..1029C 8183 UNKNOWN, // 1029D..1029F 8184 CARIAN, // 102A0..102D0 8185 UNKNOWN, // 102D1..102DF 8186 INHERITED, // 102E0 8187 COMMON, // 102E1..102FB 8188 UNKNOWN, // 102FC..102FF 8189 OLD_ITALIC, // 10300..10323 8190 UNKNOWN, // 10324..1032C 8191 OLD_ITALIC, // 1032D..1032F 8192 GOTHIC, // 10330..1034A 8193 UNKNOWN, // 1034B..1034F 8194 OLD_PERMIC, // 10350..1037A 8195 UNKNOWN, // 1037B..1037F 8196 UGARITIC, // 10380..1039D 8197 UNKNOWN, // 1039E 8198 UGARITIC, // 1039F 8199 OLD_PERSIAN, // 103A0..103C3 8200 UNKNOWN, // 103C4..103C7 8201 OLD_PERSIAN, // 103C8..103D5 8202 UNKNOWN, // 103D6..103FF 8203 DESERET, // 10400..1044F 8204 SHAVIAN, // 10450..1047F 8205 OSMANYA, // 10480..1049D 8206 UNKNOWN, // 1049E..1049F 8207 OSMANYA, // 104A0..104A9 8208 UNKNOWN, // 104AA..104AF 8209 OSAGE, // 104B0..104D3 8210 UNKNOWN, // 104D4..104D7 8211 OSAGE, // 104D8..104FB 8212 UNKNOWN, // 104FC..104FF 8213 ELBASAN, // 10500..10527 8214 UNKNOWN, // 10528..1052F 8215 CAUCASIAN_ALBANIAN, // 10530..10563 8216 UNKNOWN, // 10564..1056E 8217 CAUCASIAN_ALBANIAN, // 1056F 8218 VITHKUQI, // 10570..1057A 8219 UNKNOWN, // 1057B 8220 VITHKUQI, // 1057C..1058A 8221 UNKNOWN, // 1058B 8222 VITHKUQI, // 1058C..10592 8223 UNKNOWN, // 10593 8224 VITHKUQI, // 10594..10595 8225 UNKNOWN, // 10596 8226 VITHKUQI, // 10597..105A1 8227 UNKNOWN, // 105A2 8228 VITHKUQI, // 105A3..105B1 8229 UNKNOWN, // 105B2 8230 VITHKUQI, // 105B3..105B9 8231 UNKNOWN, // 105BA 8232 VITHKUQI, // 105BB..105BC 8233 UNKNOWN, // 105BD..105BF 8234 TODHRI, // 105C0..105F3 8235 UNKNOWN, // 105F4..105FF 8236 LINEAR_A, // 10600..10736 8237 UNKNOWN, // 10737..1073F 8238 LINEAR_A, // 10740..10755 8239 UNKNOWN, // 10756..1075F 8240 LINEAR_A, // 10760..10767 8241 UNKNOWN, // 10768..1077F 8242 LATIN, // 10780..10785 8243 UNKNOWN, // 10786 8244 LATIN, // 10787..107B0 8245 UNKNOWN, // 107B1 8246 LATIN, // 107B2..107BA 8247 UNKNOWN, // 107BB..107FF 8248 CYPRIOT, // 10800..10805 8249 UNKNOWN, // 10806..10807 8250 CYPRIOT, // 10808 8251 UNKNOWN, // 10809 8252 CYPRIOT, // 1080A..10835 8253 UNKNOWN, // 10836 8254 CYPRIOT, // 10837..10838 8255 UNKNOWN, // 10839..1083B 8256 CYPRIOT, // 1083C 8257 UNKNOWN, // 1083D..1083E 8258 CYPRIOT, // 1083F 8259 IMPERIAL_ARAMAIC, // 10840..10855 8260 UNKNOWN, // 10856 8261 IMPERIAL_ARAMAIC, // 10857..1085F 8262 PALMYRENE, // 10860..1087F 8263 NABATAEAN, // 10880..1089E 8264 UNKNOWN, // 1089F..108A6 8265 NABATAEAN, // 108A7..108AF 8266 UNKNOWN, // 108B0..108DF 8267 HATRAN, // 108E0..108F2 8268 UNKNOWN, // 108F3 8269 HATRAN, // 108F4..108F5 8270 UNKNOWN, // 108F6..108FA 8271 HATRAN, // 108FB..108FF 8272 PHOENICIAN, // 10900..1091B 8273 UNKNOWN, // 1091C..1091E 8274 PHOENICIAN, // 1091F 8275 LYDIAN, // 10920..10939 8276 UNKNOWN, // 1093A..1093E 8277 LYDIAN, // 1093F 8278 UNKNOWN, // 10940..1097F 8279 MEROITIC_HIEROGLYPHS, // 10980..1099F 8280 MEROITIC_CURSIVE, // 109A0..109B7 8281 UNKNOWN, // 109B8..109BB 8282 MEROITIC_CURSIVE, // 109BC..109CF 8283 UNKNOWN, // 109D0..109D1 8284 MEROITIC_CURSIVE, // 109D2..109FF 8285 KHAROSHTHI, // 10A00..10A03 8286 UNKNOWN, // 10A04 8287 KHAROSHTHI, // 10A05..10A06 8288 UNKNOWN, // 10A07..10A0B 8289 KHAROSHTHI, // 10A0C..10A13 8290 UNKNOWN, // 10A14 8291 KHAROSHTHI, // 10A15..10A17 8292 UNKNOWN, // 10A18 8293 KHAROSHTHI, // 10A19..10A35 8294 UNKNOWN, // 10A36..10A37 8295 KHAROSHTHI, // 10A38..10A3A 8296 UNKNOWN, // 10A3B..10A3E 8297 KHAROSHTHI, // 10A3F..10A48 8298 UNKNOWN, // 10A49..10A4F 8299 KHAROSHTHI, // 10A50..10A58 8300 UNKNOWN, // 10A59..10A5F 8301 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8302 OLD_NORTH_ARABIAN, // 10A80..10A9F 8303 UNKNOWN, // 10AA0..10ABF 8304 MANICHAEAN, // 10AC0..10AE6 8305 UNKNOWN, // 10AE7..10AEA 8306 MANICHAEAN, // 10AEB..10AF6 8307 UNKNOWN, // 10AF7..10AFF 8308 AVESTAN, // 10B00..10B35 8309 UNKNOWN, // 10B36..10B38 8310 AVESTAN, // 10B39..10B3F 8311 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8312 UNKNOWN, // 10B56..10B57 8313 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8314 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8315 UNKNOWN, // 10B73..10B77 8316 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8317 PSALTER_PAHLAVI, // 10B80..10B91 8318 UNKNOWN, // 10B92..10B98 8319 PSALTER_PAHLAVI, // 10B99..10B9C 8320 UNKNOWN, // 10B9D..10BA8 8321 PSALTER_PAHLAVI, // 10BA9..10BAF 8322 UNKNOWN, // 10BB0..10BFF 8323 OLD_TURKIC, // 10C00..10C48 8324 UNKNOWN, // 10C49..10C7F 8325 OLD_HUNGARIAN, // 10C80..10CB2 8326 UNKNOWN, // 10CB3..10CBF 8327 OLD_HUNGARIAN, // 10CC0..10CF2 8328 UNKNOWN, // 10CF3..10CF9 8329 OLD_HUNGARIAN, // 10CFA..10CFF 8330 HANIFI_ROHINGYA, // 10D00..10D27 8331 UNKNOWN, // 10D28..10D2F 8332 HANIFI_ROHINGYA, // 10D30..10D39 8333 UNKNOWN, // 10D3A..10D3F 8334 GARAY, // 10D40..10D65 8335 UNKNOWN, // 10D66..10D68 8336 GARAY, // 10D69..10D85 8337 UNKNOWN, // 10D86..10D8D 8338 GARAY, // 10D8E..10D8F 8339 UNKNOWN, // 10D90..10E5F 8340 ARABIC, // 10E60..10E7E 8341 UNKNOWN, // 10E7F 8342 YEZIDI, // 10E80..10EA9 8343 UNKNOWN, // 10EAA 8344 YEZIDI, // 10EAB..10EAD 8345 UNKNOWN, // 10EAE..10EAF 8346 YEZIDI, // 10EB0..10EB1 8347 UNKNOWN, // 10EB2..10EC1 8348 ARABIC, // 10EC2..10EC4 8349 UNKNOWN, // 10EC5..10EFB 8350 ARABIC, // 10EFC..10EFF 8351 OLD_SOGDIAN, // 10F00..10F27 8352 UNKNOWN, // 10F28..10F2F 8353 SOGDIAN, // 10F30..10F59 8354 UNKNOWN, // 10F5A..10F6F 8355 OLD_UYGHUR, // 10F70..10F89 8356 UNKNOWN, // 10F8A..10FAF 8357 CHORASMIAN, // 10FB0..10FCB 8358 UNKNOWN, // 10FCC..10FDF 8359 ELYMAIC, // 10FE0..10FF6 8360 UNKNOWN, // 10FF7..10FFF 8361 BRAHMI, // 11000..1104D 8362 UNKNOWN, // 1104E..11051 8363 BRAHMI, // 11052..11075 8364 UNKNOWN, // 11076..1107E 8365 BRAHMI, // 1107F 8366 KAITHI, // 11080..110C2 8367 UNKNOWN, // 110C3..110CC 8368 KAITHI, // 110CD 8369 UNKNOWN, // 110CE..110CF 8370 SORA_SOMPENG, // 110D0..110E8 8371 UNKNOWN, // 110E9..110EF 8372 SORA_SOMPENG, // 110F0..110F9 8373 UNKNOWN, // 110FA..110FF 8374 CHAKMA, // 11100..11134 8375 UNKNOWN, // 11135 8376 CHAKMA, // 11136..11147 8377 UNKNOWN, // 11148..1114F 8378 MAHAJANI, // 11150..11176 8379 UNKNOWN, // 11177..1117F 8380 SHARADA, // 11180..111DF 8381 UNKNOWN, // 111E0 8382 SINHALA, // 111E1..111F4 8383 UNKNOWN, // 111F5..111FF 8384 KHOJKI, // 11200..11211 8385 UNKNOWN, // 11212 8386 KHOJKI, // 11213..11241 8387 UNKNOWN, // 11242..1127F 8388 MULTANI, // 11280..11286 8389 UNKNOWN, // 11287 8390 MULTANI, // 11288 8391 UNKNOWN, // 11289 8392 MULTANI, // 1128A..1128D 8393 UNKNOWN, // 1128E 8394 MULTANI, // 1128F..1129D 8395 UNKNOWN, // 1129E 8396 MULTANI, // 1129F..112A9 8397 UNKNOWN, // 112AA..112AF 8398 KHUDAWADI, // 112B0..112EA 8399 UNKNOWN, // 112EB..112EF 8400 KHUDAWADI, // 112F0..112F9 8401 UNKNOWN, // 112FA..112FF 8402 GRANTHA, // 11300..11303 8403 UNKNOWN, // 11304 8404 GRANTHA, // 11305..1130C 8405 UNKNOWN, // 1130D..1130E 8406 GRANTHA, // 1130F..11310 8407 UNKNOWN, // 11311..11312 8408 GRANTHA, // 11313..11328 8409 UNKNOWN, // 11329 8410 GRANTHA, // 1132A..11330 8411 UNKNOWN, // 11331 8412 GRANTHA, // 11332..11333 8413 UNKNOWN, // 11334 8414 GRANTHA, // 11335..11339 8415 UNKNOWN, // 1133A 8416 INHERITED, // 1133B 8417 GRANTHA, // 1133C..11344 8418 UNKNOWN, // 11345..11346 8419 GRANTHA, // 11347..11348 8420 UNKNOWN, // 11349..1134A 8421 GRANTHA, // 1134B..1134D 8422 UNKNOWN, // 1134E..1134F 8423 GRANTHA, // 11350 8424 UNKNOWN, // 11351..11356 8425 GRANTHA, // 11357 8426 UNKNOWN, // 11358..1135C 8427 GRANTHA, // 1135D..11363 8428 UNKNOWN, // 11364..11365 8429 GRANTHA, // 11366..1136C 8430 UNKNOWN, // 1136D..1136F 8431 GRANTHA, // 11370..11374 8432 UNKNOWN, // 11375..1137F 8433 TULU_TIGALARI, // 11380..11389 8434 UNKNOWN, // 1138A 8435 TULU_TIGALARI, // 1138B 8436 UNKNOWN, // 1138C..1138D 8437 TULU_TIGALARI, // 1138E 8438 UNKNOWN, // 1138F 8439 TULU_TIGALARI, // 11390..113B5 8440 UNKNOWN, // 113B6 8441 TULU_TIGALARI, // 113B7..113C0 8442 UNKNOWN, // 113C1 8443 TULU_TIGALARI, // 113C2 8444 UNKNOWN, // 113C3..113C4 8445 TULU_TIGALARI, // 113C5 8446 UNKNOWN, // 113C6 8447 TULU_TIGALARI, // 113C7..113CA 8448 UNKNOWN, // 113CB 8449 TULU_TIGALARI, // 113CC..113D5 8450 UNKNOWN, // 113D6 8451 TULU_TIGALARI, // 113D7..113D8 8452 UNKNOWN, // 113D9..113E0 8453 TULU_TIGALARI, // 113E1..113E2 8454 UNKNOWN, // 113E3..113FF 8455 NEWA, // 11400..1145B 8456 UNKNOWN, // 1145C 8457 NEWA, // 1145D..11461 8458 UNKNOWN, // 11462..1147F 8459 TIRHUTA, // 11480..114C7 8460 UNKNOWN, // 114C8..114CF 8461 TIRHUTA, // 114D0..114D9 8462 UNKNOWN, // 114DA..1157F 8463 SIDDHAM, // 11580..115B5 8464 UNKNOWN, // 115B6..115B7 8465 SIDDHAM, // 115B8..115DD 8466 UNKNOWN, // 115DE..115FF 8467 MODI, // 11600..11644 8468 UNKNOWN, // 11645..1164F 8469 MODI, // 11650..11659 8470 UNKNOWN, // 1165A..1165F 8471 MONGOLIAN, // 11660..1166C 8472 UNKNOWN, // 1166D..1167F 8473 TAKRI, // 11680..116B9 8474 UNKNOWN, // 116BA..116BF 8475 TAKRI, // 116C0..116C9 8476 UNKNOWN, // 116CA..116CF 8477 MYANMAR, // 116D0..116E3 8478 UNKNOWN, // 116E4..116FF 8479 AHOM, // 11700..1171A 8480 UNKNOWN, // 1171B..1171C 8481 AHOM, // 1171D..1172B 8482 UNKNOWN, // 1172C..1172F 8483 AHOM, // 11730..11746 8484 UNKNOWN, // 11747..117FF 8485 DOGRA, // 11800..1183B 8486 UNKNOWN, // 1183C..1189F 8487 WARANG_CITI, // 118A0..118F2 8488 UNKNOWN, // 118F3..118FE 8489 WARANG_CITI, // 118FF 8490 DIVES_AKURU, // 11900..11906 8491 UNKNOWN, // 11907..11908 8492 DIVES_AKURU, // 11909 8493 UNKNOWN, // 1190A..1190B 8494 DIVES_AKURU, // 1190C..11913 8495 UNKNOWN, // 11914 8496 DIVES_AKURU, // 11915..11916 8497 UNKNOWN, // 11917 8498 DIVES_AKURU, // 11918..11935 8499 UNKNOWN, // 11936 8500 DIVES_AKURU, // 11937..11938 8501 UNKNOWN, // 11939..1193A 8502 DIVES_AKURU, // 1193B..11946 8503 UNKNOWN, // 11947..1194F 8504 DIVES_AKURU, // 11950..11959 8505 UNKNOWN, // 1195A..1199F 8506 NANDINAGARI, // 119A0..119A7 8507 UNKNOWN, // 119A8..119A9 8508 NANDINAGARI, // 119AA..119D7 8509 UNKNOWN, // 119D8..119D9 8510 NANDINAGARI, // 119DA..119E4 8511 UNKNOWN, // 119E5..119FF 8512 ZANABAZAR_SQUARE, // 11A00..11A47 8513 UNKNOWN, // 11A48..11A4F 8514 SOYOMBO, // 11A50..11AA2 8515 UNKNOWN, // 11AA3..11AAF 8516 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8517 PAU_CIN_HAU, // 11AC0..11AF8 8518 UNKNOWN, // 11AF9..11AFF 8519 DEVANAGARI, // 11B00..11B09 8520 UNKNOWN, // 11B0A..11BBF 8521 SUNUWAR, // 11BC0..11BE1 8522 UNKNOWN, // 11BE2..11BEF 8523 SUNUWAR, // 11BF0..11BF9 8524 UNKNOWN, // 11BFA..11BFF 8525 BHAIKSUKI, // 11C00..11C08 8526 UNKNOWN, // 11C09 8527 BHAIKSUKI, // 11C0A..11C36 8528 UNKNOWN, // 11C37 8529 BHAIKSUKI, // 11C38..11C45 8530 UNKNOWN, // 11C46..11C4F 8531 BHAIKSUKI, // 11C50..11C6C 8532 UNKNOWN, // 11C6D..11C6F 8533 MARCHEN, // 11C70..11C8F 8534 UNKNOWN, // 11C90..11C91 8535 MARCHEN, // 11C92..11CA7 8536 UNKNOWN, // 11CA8 8537 MARCHEN, // 11CA9..11CB6 8538 UNKNOWN, // 11CB7..11CFF 8539 MASARAM_GONDI, // 11D00..11D06 8540 UNKNOWN, // 11D07 8541 MASARAM_GONDI, // 11D08..11D09 8542 UNKNOWN, // 11D0A 8543 MASARAM_GONDI, // 11D0B..11D36 8544 UNKNOWN, // 11D37..11D39 8545 MASARAM_GONDI, // 11D3A 8546 UNKNOWN, // 11D3B 8547 MASARAM_GONDI, // 11D3C..11D3D 8548 UNKNOWN, // 11D3E 8549 MASARAM_GONDI, // 11D3F..11D47 8550 UNKNOWN, // 11D48..11D4F 8551 MASARAM_GONDI, // 11D50..11D59 8552 UNKNOWN, // 11D5A..11D5F 8553 GUNJALA_GONDI, // 11D60..11D65 8554 UNKNOWN, // 11D66 8555 GUNJALA_GONDI, // 11D67..11D68 8556 UNKNOWN, // 11D69 8557 GUNJALA_GONDI, // 11D6A..11D8E 8558 UNKNOWN, // 11D8F 8559 GUNJALA_GONDI, // 11D90..11D91 8560 UNKNOWN, // 11D92 8561 GUNJALA_GONDI, // 11D93..11D98 8562 UNKNOWN, // 11D99..11D9F 8563 GUNJALA_GONDI, // 11DA0..11DA9 8564 UNKNOWN, // 11DAA..11EDF 8565 MAKASAR, // 11EE0..11EF8 8566 UNKNOWN, // 11EF9..11EFF 8567 KAWI, // 11F00..11F10 8568 UNKNOWN, // 11F11 8569 KAWI, // 11F12..11F3A 8570 UNKNOWN, // 11F3B..11F3D 8571 KAWI, // 11F3E..11F5A 8572 UNKNOWN, // 11F5B..11FAF 8573 LISU, // 11FB0 8574 UNKNOWN, // 11FB1..11FBF 8575 TAMIL, // 11FC0..11FF1 8576 UNKNOWN, // 11FF2..11FFE 8577 TAMIL, // 11FFF 8578 CUNEIFORM, // 12000..12399 8579 UNKNOWN, // 1239A..123FF 8580 CUNEIFORM, // 12400..1246E 8581 UNKNOWN, // 1246F 8582 CUNEIFORM, // 12470..12474 8583 UNKNOWN, // 12475..1247F 8584 CUNEIFORM, // 12480..12543 8585 UNKNOWN, // 12544..12F8F 8586 CYPRO_MINOAN, // 12F90..12FF2 8587 UNKNOWN, // 12FF3..12FFF 8588 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8589 UNKNOWN, // 13456..1345F 8590 EGYPTIAN_HIEROGLYPHS, // 13460..143FA 8591 UNKNOWN, // 143FB..143FF 8592 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8593 UNKNOWN, // 14647..160FF 8594 GURUNG_KHEMA, // 16100..16139 8595 UNKNOWN, // 1613A..167FF 8596 BAMUM, // 16800..16A38 8597 UNKNOWN, // 16A39..16A3F 8598 MRO, // 16A40..16A5E 8599 UNKNOWN, // 16A5F 8600 MRO, // 16A60..16A69 8601 UNKNOWN, // 16A6A..16A6D 8602 MRO, // 16A6E..16A6F 8603 TANGSA, // 16A70..16ABE 8604 UNKNOWN, // 16ABF 8605 TANGSA, // 16AC0..16AC9 8606 UNKNOWN, // 16ACA..16ACF 8607 BASSA_VAH, // 16AD0..16AED 8608 UNKNOWN, // 16AEE..16AEF 8609 BASSA_VAH, // 16AF0..16AF5 8610 UNKNOWN, // 16AF6..16AFF 8611 PAHAWH_HMONG, // 16B00..16B45 8612 UNKNOWN, // 16B46..16B4F 8613 PAHAWH_HMONG, // 16B50..16B59 8614 UNKNOWN, // 16B5A 8615 PAHAWH_HMONG, // 16B5B..16B61 8616 UNKNOWN, // 16B62 8617 PAHAWH_HMONG, // 16B63..16B77 8618 UNKNOWN, // 16B78..16B7C 8619 PAHAWH_HMONG, // 16B7D..16B8F 8620 UNKNOWN, // 16B90..16D3F 8621 KIRAT_RAI, // 16D40..16D79 8622 UNKNOWN, // 16D7A..16E3F 8623 MEDEFAIDRIN, // 16E40..16E9A 8624 UNKNOWN, // 16E9B..16EFF 8625 MIAO, // 16F00..16F4A 8626 UNKNOWN, // 16F4B..16F4E 8627 MIAO, // 16F4F..16F87 8628 UNKNOWN, // 16F88..16F8E 8629 MIAO, // 16F8F..16F9F 8630 UNKNOWN, // 16FA0..16FDF 8631 TANGUT, // 16FE0 8632 NUSHU, // 16FE1 8633 HAN, // 16FE2..16FE3 8634 KHITAN_SMALL_SCRIPT, // 16FE4 8635 UNKNOWN, // 16FE5..16FEF 8636 HAN, // 16FF0..16FF1 8637 UNKNOWN, // 16FF2..16FFF 8638 TANGUT, // 17000..187F7 8639 UNKNOWN, // 187F8..187FF 8640 TANGUT, // 18800..18AFF 8641 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8642 UNKNOWN, // 18CD6..18CFE 8643 KHITAN_SMALL_SCRIPT, // 18CFF 8644 TANGUT, // 18D00..18D08 8645 UNKNOWN, // 18D09..1AFEF 8646 KATAKANA, // 1AFF0..1AFF3 8647 UNKNOWN, // 1AFF4 8648 KATAKANA, // 1AFF5..1AFFB 8649 UNKNOWN, // 1AFFC 8650 KATAKANA, // 1AFFD..1AFFE 8651 UNKNOWN, // 1AFFF 8652 KATAKANA, // 1B000 8653 HIRAGANA, // 1B001..1B11F 8654 KATAKANA, // 1B120..1B122 8655 UNKNOWN, // 1B123..1B131 8656 HIRAGANA, // 1B132 8657 UNKNOWN, // 1B133..1B14F 8658 HIRAGANA, // 1B150..1B152 8659 UNKNOWN, // 1B153..1B154 8660 KATAKANA, // 1B155 8661 UNKNOWN, // 1B156..1B163 8662 KATAKANA, // 1B164..1B167 8663 UNKNOWN, // 1B168..1B16F 8664 NUSHU, // 1B170..1B2FB 8665 UNKNOWN, // 1B2FC..1BBFF 8666 DUPLOYAN, // 1BC00..1BC6A 8667 UNKNOWN, // 1BC6B..1BC6F 8668 DUPLOYAN, // 1BC70..1BC7C 8669 UNKNOWN, // 1BC7D..1BC7F 8670 DUPLOYAN, // 1BC80..1BC88 8671 UNKNOWN, // 1BC89..1BC8F 8672 DUPLOYAN, // 1BC90..1BC99 8673 UNKNOWN, // 1BC9A..1BC9B 8674 DUPLOYAN, // 1BC9C..1BC9F 8675 COMMON, // 1BCA0..1BCA3 8676 UNKNOWN, // 1BCA4..1CBFF 8677 COMMON, // 1CC00..1CCF9 8678 UNKNOWN, // 1CCFA..1CCFF 8679 COMMON, // 1CD00..1CEB3 8680 UNKNOWN, // 1CEB4..1CEFF 8681 INHERITED, // 1CF00..1CF2D 8682 UNKNOWN, // 1CF2E..1CF2F 8683 INHERITED, // 1CF30..1CF46 8684 UNKNOWN, // 1CF47..1CF4F 8685 COMMON, // 1CF50..1CFC3 8686 UNKNOWN, // 1CFC4..1CFFF 8687 COMMON, // 1D000..1D0F5 8688 UNKNOWN, // 1D0F6..1D0FF 8689 COMMON, // 1D100..1D126 8690 UNKNOWN, // 1D127..1D128 8691 COMMON, // 1D129..1D166 8692 INHERITED, // 1D167..1D169 8693 COMMON, // 1D16A..1D17A 8694 INHERITED, // 1D17B..1D182 8695 COMMON, // 1D183..1D184 8696 INHERITED, // 1D185..1D18B 8697 COMMON, // 1D18C..1D1A9 8698 INHERITED, // 1D1AA..1D1AD 8699 COMMON, // 1D1AE..1D1EA 8700 UNKNOWN, // 1D1EB..1D1FF 8701 GREEK, // 1D200..1D245 8702 UNKNOWN, // 1D246..1D2BF 8703 COMMON, // 1D2C0..1D2D3 8704 UNKNOWN, // 1D2D4..1D2DF 8705 COMMON, // 1D2E0..1D2F3 8706 UNKNOWN, // 1D2F4..1D2FF 8707 COMMON, // 1D300..1D356 8708 UNKNOWN, // 1D357..1D35F 8709 COMMON, // 1D360..1D378 8710 UNKNOWN, // 1D379..1D3FF 8711 COMMON, // 1D400..1D454 8712 UNKNOWN, // 1D455 8713 COMMON, // 1D456..1D49C 8714 UNKNOWN, // 1D49D 8715 COMMON, // 1D49E..1D49F 8716 UNKNOWN, // 1D4A0..1D4A1 8717 COMMON, // 1D4A2 8718 UNKNOWN, // 1D4A3..1D4A4 8719 COMMON, // 1D4A5..1D4A6 8720 UNKNOWN, // 1D4A7..1D4A8 8721 COMMON, // 1D4A9..1D4AC 8722 UNKNOWN, // 1D4AD 8723 COMMON, // 1D4AE..1D4B9 8724 UNKNOWN, // 1D4BA 8725 COMMON, // 1D4BB 8726 UNKNOWN, // 1D4BC 8727 COMMON, // 1D4BD..1D4C3 8728 UNKNOWN, // 1D4C4 8729 COMMON, // 1D4C5..1D505 8730 UNKNOWN, // 1D506 8731 COMMON, // 1D507..1D50A 8732 UNKNOWN, // 1D50B..1D50C 8733 COMMON, // 1D50D..1D514 8734 UNKNOWN, // 1D515 8735 COMMON, // 1D516..1D51C 8736 UNKNOWN, // 1D51D 8737 COMMON, // 1D51E..1D539 8738 UNKNOWN, // 1D53A 8739 COMMON, // 1D53B..1D53E 8740 UNKNOWN, // 1D53F 8741 COMMON, // 1D540..1D544 8742 UNKNOWN, // 1D545 8743 COMMON, // 1D546 8744 UNKNOWN, // 1D547..1D549 8745 COMMON, // 1D54A..1D550 8746 UNKNOWN, // 1D551 8747 COMMON, // 1D552..1D6A5 8748 UNKNOWN, // 1D6A6..1D6A7 8749 COMMON, // 1D6A8..1D7CB 8750 UNKNOWN, // 1D7CC..1D7CD 8751 COMMON, // 1D7CE..1D7FF 8752 SIGNWRITING, // 1D800..1DA8B 8753 UNKNOWN, // 1DA8C..1DA9A 8754 SIGNWRITING, // 1DA9B..1DA9F 8755 UNKNOWN, // 1DAA0 8756 SIGNWRITING, // 1DAA1..1DAAF 8757 UNKNOWN, // 1DAB0..1DEFF 8758 LATIN, // 1DF00..1DF1E 8759 UNKNOWN, // 1DF1F..1DF24 8760 LATIN, // 1DF25..1DF2A 8761 UNKNOWN, // 1DF2B..1DFFF 8762 GLAGOLITIC, // 1E000..1E006 8763 UNKNOWN, // 1E007 8764 GLAGOLITIC, // 1E008..1E018 8765 UNKNOWN, // 1E019..1E01A 8766 GLAGOLITIC, // 1E01B..1E021 8767 UNKNOWN, // 1E022 8768 GLAGOLITIC, // 1E023..1E024 8769 UNKNOWN, // 1E025 8770 GLAGOLITIC, // 1E026..1E02A 8771 UNKNOWN, // 1E02B..1E02F 8772 CYRILLIC, // 1E030..1E06D 8773 UNKNOWN, // 1E06E..1E08E 8774 CYRILLIC, // 1E08F 8775 UNKNOWN, // 1E090..1E0FF 8776 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8777 UNKNOWN, // 1E12D..1E12F 8778 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8779 UNKNOWN, // 1E13E..1E13F 8780 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8781 UNKNOWN, // 1E14A..1E14D 8782 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8783 UNKNOWN, // 1E150..1E28F 8784 TOTO, // 1E290..1E2AE 8785 UNKNOWN, // 1E2AF..1E2BF 8786 WANCHO, // 1E2C0..1E2F9 8787 UNKNOWN, // 1E2FA..1E2FE 8788 WANCHO, // 1E2FF 8789 UNKNOWN, // 1E300..1E4CF 8790 NAG_MUNDARI, // 1E4D0..1E4F9 8791 UNKNOWN, // 1E4FA..1E5CF 8792 OL_ONAL, // 1E5D0..1E5FA 8793 UNKNOWN, // 1E5FB..1E5FE 8794 OL_ONAL, // 1E5FF 8795 UNKNOWN, // 1E600..1E7DF 8796 ETHIOPIC, // 1E7E0..1E7E6 8797 UNKNOWN, // 1E7E7 8798 ETHIOPIC, // 1E7E8..1E7EB 8799 UNKNOWN, // 1E7EC 8800 ETHIOPIC, // 1E7ED..1E7EE 8801 UNKNOWN, // 1E7EF 8802 ETHIOPIC, // 1E7F0..1E7FE 8803 UNKNOWN, // 1E7FF 8804 MENDE_KIKAKUI, // 1E800..1E8C4 8805 UNKNOWN, // 1E8C5..1E8C6 8806 MENDE_KIKAKUI, // 1E8C7..1E8D6 8807 UNKNOWN, // 1E8D7..1E8FF 8808 ADLAM, // 1E900..1E94B 8809 UNKNOWN, // 1E94C..1E94F 8810 ADLAM, // 1E950..1E959 8811 UNKNOWN, // 1E95A..1E95D 8812 ADLAM, // 1E95E..1E95F 8813 UNKNOWN, // 1E960..1EC70 8814 COMMON, // 1EC71..1ECB4 8815 UNKNOWN, // 1ECB5..1ED00 8816 COMMON, // 1ED01..1ED3D 8817 UNKNOWN, // 1ED3E..1EDFF 8818 ARABIC, // 1EE00..1EE03 8819 UNKNOWN, // 1EE04 8820 ARABIC, // 1EE05..1EE1F 8821 UNKNOWN, // 1EE20 8822 ARABIC, // 1EE21..1EE22 8823 UNKNOWN, // 1EE23 8824 ARABIC, // 1EE24 8825 UNKNOWN, // 1EE25..1EE26 8826 ARABIC, // 1EE27 8827 UNKNOWN, // 1EE28 8828 ARABIC, // 1EE29..1EE32 8829 UNKNOWN, // 1EE33 8830 ARABIC, // 1EE34..1EE37 8831 UNKNOWN, // 1EE38 8832 ARABIC, // 1EE39 8833 UNKNOWN, // 1EE3A 8834 ARABIC, // 1EE3B 8835 UNKNOWN, // 1EE3C..1EE41 8836 ARABIC, // 1EE42 8837 UNKNOWN, // 1EE43..1EE46 8838 ARABIC, // 1EE47 8839 UNKNOWN, // 1EE48 8840 ARABIC, // 1EE49 8841 UNKNOWN, // 1EE4A 8842 ARABIC, // 1EE4B 8843 UNKNOWN, // 1EE4C 8844 ARABIC, // 1EE4D..1EE4F 8845 UNKNOWN, // 1EE50 8846 ARABIC, // 1EE51..1EE52 8847 UNKNOWN, // 1EE53 8848 ARABIC, // 1EE54 8849 UNKNOWN, // 1EE55..1EE56 8850 ARABIC, // 1EE57 8851 UNKNOWN, // 1EE58 8852 ARABIC, // 1EE59 8853 UNKNOWN, // 1EE5A 8854 ARABIC, // 1EE5B 8855 UNKNOWN, // 1EE5C 8856 ARABIC, // 1EE5D 8857 UNKNOWN, // 1EE5E 8858 ARABIC, // 1EE5F 8859 UNKNOWN, // 1EE60 8860 ARABIC, // 1EE61..1EE62 8861 UNKNOWN, // 1EE63 8862 ARABIC, // 1EE64 8863 UNKNOWN, // 1EE65..1EE66 8864 ARABIC, // 1EE67..1EE6A 8865 UNKNOWN, // 1EE6B 8866 ARABIC, // 1EE6C..1EE72 8867 UNKNOWN, // 1EE73 8868 ARABIC, // 1EE74..1EE77 8869 UNKNOWN, // 1EE78 8870 ARABIC, // 1EE79..1EE7C 8871 UNKNOWN, // 1EE7D 8872 ARABIC, // 1EE7E 8873 UNKNOWN, // 1EE7F 8874 ARABIC, // 1EE80..1EE89 8875 UNKNOWN, // 1EE8A 8876 ARABIC, // 1EE8B..1EE9B 8877 UNKNOWN, // 1EE9C..1EEA0 8878 ARABIC, // 1EEA1..1EEA3 8879 UNKNOWN, // 1EEA4 8880 ARABIC, // 1EEA5..1EEA9 8881 UNKNOWN, // 1EEAA 8882 ARABIC, // 1EEAB..1EEBB 8883 UNKNOWN, // 1EEBC..1EEEF 8884 ARABIC, // 1EEF0..1EEF1 8885 UNKNOWN, // 1EEF2..1EFFF 8886 COMMON, // 1F000..1F02B 8887 UNKNOWN, // 1F02C..1F02F 8888 COMMON, // 1F030..1F093 8889 UNKNOWN, // 1F094..1F09F 8890 COMMON, // 1F0A0..1F0AE 8891 UNKNOWN, // 1F0AF..1F0B0 8892 COMMON, // 1F0B1..1F0BF 8893 UNKNOWN, // 1F0C0 8894 COMMON, // 1F0C1..1F0CF 8895 UNKNOWN, // 1F0D0 8896 COMMON, // 1F0D1..1F0F5 8897 UNKNOWN, // 1F0F6..1F0FF 8898 COMMON, // 1F100..1F1AD 8899 UNKNOWN, // 1F1AE..1F1E5 8900 COMMON, // 1F1E6..1F1FF 8901 HIRAGANA, // 1F200 8902 COMMON, // 1F201..1F202 8903 UNKNOWN, // 1F203..1F20F 8904 COMMON, // 1F210..1F23B 8905 UNKNOWN, // 1F23C..1F23F 8906 COMMON, // 1F240..1F248 8907 UNKNOWN, // 1F249..1F24F 8908 COMMON, // 1F250..1F251 8909 UNKNOWN, // 1F252..1F25F 8910 COMMON, // 1F260..1F265 8911 UNKNOWN, // 1F266..1F2FF 8912 COMMON, // 1F300..1F6D7 8913 UNKNOWN, // 1F6D8..1F6DB 8914 COMMON, // 1F6DC..1F6EC 8915 UNKNOWN, // 1F6ED..1F6EF 8916 COMMON, // 1F6F0..1F6FC 8917 UNKNOWN, // 1F6FD..1F6FF 8918 COMMON, // 1F700..1F776 8919 UNKNOWN, // 1F777..1F77A 8920 COMMON, // 1F77B..1F7D9 8921 UNKNOWN, // 1F7DA..1F7DF 8922 COMMON, // 1F7E0..1F7EB 8923 UNKNOWN, // 1F7EC..1F7EF 8924 COMMON, // 1F7F0 8925 UNKNOWN, // 1F7F1..1F7FF 8926 COMMON, // 1F800..1F80B 8927 UNKNOWN, // 1F80C..1F80F 8928 COMMON, // 1F810..1F847 8929 UNKNOWN, // 1F848..1F84F 8930 COMMON, // 1F850..1F859 8931 UNKNOWN, // 1F85A..1F85F 8932 COMMON, // 1F860..1F887 8933 UNKNOWN, // 1F888..1F88F 8934 COMMON, // 1F890..1F8AD 8935 UNKNOWN, // 1F8AE..1F8AF 8936 COMMON, // 1F8B0..1F8BB 8937 UNKNOWN, // 1F8BC..1F8BF 8938 COMMON, // 1F8C0..1F8C1 8939 UNKNOWN, // 1F8C2..1F8FF 8940 COMMON, // 1F900..1FA53 8941 UNKNOWN, // 1FA54..1FA5F 8942 COMMON, // 1FA60..1FA6D 8943 UNKNOWN, // 1FA6E..1FA6F 8944 COMMON, // 1FA70..1FA7C 8945 UNKNOWN, // 1FA7D..1FA7F 8946 COMMON, // 1FA80..1FA89 8947 UNKNOWN, // 1FA8A..1FA8E 8948 COMMON, // 1FA8F..1FAC6 8949 UNKNOWN, // 1FAC7..1FACD 8950 COMMON, // 1FACE..1FADC 8951 UNKNOWN, // 1FADD..1FADE 8952 COMMON, // 1FADF..1FAE9 8953 UNKNOWN, // 1FAEA..1FAEF 8954 COMMON, // 1FAF0..1FAF8 8955 UNKNOWN, // 1FAF9..1FAFF 8956 COMMON, // 1FB00..1FB92 8957 UNKNOWN, // 1FB93 8958 COMMON, // 1FB94..1FBF9 8959 UNKNOWN, // 1FBFA..1FFFF 8960 HAN, // 20000..2A6DF 8961 UNKNOWN, // 2A6E0..2A6FF 8962 HAN, // 2A700..2B739 8963 UNKNOWN, // 2B73A..2B73F 8964 HAN, // 2B740..2B81D 8965 UNKNOWN, // 2B81E..2B81F 8966 HAN, // 2B820..2CEA1 8967 UNKNOWN, // 2CEA2..2CEAF 8968 HAN, // 2CEB0..2EBE0 8969 UNKNOWN, // 2EBE1..2EBEF 8970 HAN, // 2EBF0..2EE5D 8971 UNKNOWN, // 2EE5E..2F7FF 8972 HAN, // 2F800..2FA1D 8973 UNKNOWN, // 2FA1E..2FFFF 8974 HAN, // 30000..3134A 8975 UNKNOWN, // 3134B..3134F 8976 HAN, // 31350..323AF 8977 UNKNOWN, // 323B0..E0000 8978 COMMON, // E0001 8979 UNKNOWN, // E0002..E001F 8980 COMMON, // E0020..E007F 8981 UNKNOWN, // E0080..E00FF 8982 INHERITED, // E0100..E01EF 8983 UNKNOWN, // E01F0..10FFFF 8984 }; 8985 8986 private static final HashMap<String, Character.UnicodeScript> aliases; 8987 static { 8988 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8989 aliases.put("ADLM", ADLAM); 8990 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8991 aliases.put("AHOM", AHOM); 8992 aliases.put("ARAB", ARABIC); 8993 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8994 aliases.put("ARMN", ARMENIAN); 8995 aliases.put("AVST", AVESTAN); 8996 aliases.put("BALI", BALINESE); 8997 aliases.put("BAMU", BAMUM); 8998 aliases.put("BASS", BASSA_VAH); 8999 aliases.put("BATK", BATAK); 9000 aliases.put("BENG", BENGALI); 9001 aliases.put("BHKS", BHAIKSUKI); 9002 aliases.put("BOPO", BOPOMOFO); 9003 aliases.put("BRAH", BRAHMI); 9004 aliases.put("BRAI", BRAILLE); 9005 aliases.put("BUGI", BUGINESE); 9006 aliases.put("BUHD", BUHID); 9007 aliases.put("CAKM", CHAKMA); 9008 aliases.put("CANS", CANADIAN_ABORIGINAL); 9009 aliases.put("CARI", CARIAN); 9010 aliases.put("CHAM", CHAM); 9011 aliases.put("CHER", CHEROKEE); 9012 aliases.put("CHRS", CHORASMIAN); 9013 aliases.put("COPT", COPTIC); 9014 aliases.put("CPMN", CYPRO_MINOAN); 9015 aliases.put("CPRT", CYPRIOT); 9016 aliases.put("CYRL", CYRILLIC); 9017 aliases.put("DEVA", DEVANAGARI); 9018 aliases.put("DIAK", DIVES_AKURU); 9019 aliases.put("DOGR", DOGRA); 9020 aliases.put("DSRT", DESERET); 9021 aliases.put("DUPL", DUPLOYAN); 9022 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 9023 aliases.put("ELBA", ELBASAN); 9024 aliases.put("ELYM", ELYMAIC); 9025 aliases.put("ETHI", ETHIOPIC); 9026 aliases.put("GARA", GARAY); 9027 aliases.put("GEOR", GEORGIAN); 9028 aliases.put("GLAG", GLAGOLITIC); 9029 aliases.put("GONG", GUNJALA_GONDI); 9030 aliases.put("GONM", MASARAM_GONDI); 9031 aliases.put("GOTH", GOTHIC); 9032 aliases.put("GRAN", GRANTHA); 9033 aliases.put("GREK", GREEK); 9034 aliases.put("GUJR", GUJARATI); 9035 aliases.put("GUKH", GURUNG_KHEMA); 9036 aliases.put("GURU", GURMUKHI); 9037 aliases.put("HANG", HANGUL); 9038 aliases.put("HANI", HAN); 9039 aliases.put("HANO", HANUNOO); 9040 aliases.put("HATR", HATRAN); 9041 aliases.put("HEBR", HEBREW); 9042 aliases.put("HIRA", HIRAGANA); 9043 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 9044 aliases.put("HMNG", PAHAWH_HMONG); 9045 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 9046 aliases.put("HUNG", OLD_HUNGARIAN); 9047 aliases.put("ITAL", OLD_ITALIC); 9048 aliases.put("JAVA", JAVANESE); 9049 aliases.put("KALI", KAYAH_LI); 9050 aliases.put("KANA", KATAKANA); 9051 aliases.put("KAWI", KAWI); 9052 aliases.put("KHAR", KHAROSHTHI); 9053 aliases.put("KHMR", KHMER); 9054 aliases.put("KHOJ", KHOJKI); 9055 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 9056 aliases.put("KNDA", KANNADA); 9057 aliases.put("KRAI", KIRAT_RAI); 9058 aliases.put("KTHI", KAITHI); 9059 aliases.put("LANA", TAI_THAM); 9060 aliases.put("LAOO", LAO); 9061 aliases.put("LATN", LATIN); 9062 aliases.put("LEPC", LEPCHA); 9063 aliases.put("LIMB", LIMBU); 9064 aliases.put("LINA", LINEAR_A); 9065 aliases.put("LINB", LINEAR_B); 9066 aliases.put("LISU", LISU); 9067 aliases.put("LYCI", LYCIAN); 9068 aliases.put("LYDI", LYDIAN); 9069 aliases.put("MAHJ", MAHAJANI); 9070 aliases.put("MAKA", MAKASAR); 9071 aliases.put("MAND", MANDAIC); 9072 aliases.put("MANI", MANICHAEAN); 9073 aliases.put("MARC", MARCHEN); 9074 aliases.put("MEDF", MEDEFAIDRIN); 9075 aliases.put("MEND", MENDE_KIKAKUI); 9076 aliases.put("MERC", MEROITIC_CURSIVE); 9077 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 9078 aliases.put("MLYM", MALAYALAM); 9079 aliases.put("MODI", MODI); 9080 aliases.put("MONG", MONGOLIAN); 9081 aliases.put("MROO", MRO); 9082 aliases.put("MTEI", MEETEI_MAYEK); 9083 aliases.put("MULT", MULTANI); 9084 aliases.put("MYMR", MYANMAR); 9085 aliases.put("NAGM", NAG_MUNDARI); 9086 aliases.put("NAND", NANDINAGARI); 9087 aliases.put("NARB", OLD_NORTH_ARABIAN); 9088 aliases.put("NBAT", NABATAEAN); 9089 aliases.put("NEWA", NEWA); 9090 aliases.put("NKOO", NKO); 9091 aliases.put("NSHU", NUSHU); 9092 aliases.put("OGAM", OGHAM); 9093 aliases.put("OLCK", OL_CHIKI); 9094 aliases.put("ONAO", OL_ONAL); 9095 aliases.put("ORKH", OLD_TURKIC); 9096 aliases.put("ORYA", ORIYA); 9097 aliases.put("OSGE", OSAGE); 9098 aliases.put("OSMA", OSMANYA); 9099 aliases.put("OUGR", OLD_UYGHUR); 9100 aliases.put("PALM", PALMYRENE); 9101 aliases.put("PAUC", PAU_CIN_HAU); 9102 aliases.put("PERM", OLD_PERMIC); 9103 aliases.put("PHAG", PHAGS_PA); 9104 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 9105 aliases.put("PHLP", PSALTER_PAHLAVI); 9106 aliases.put("PHNX", PHOENICIAN); 9107 aliases.put("PLRD", MIAO); 9108 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 9109 aliases.put("RJNG", REJANG); 9110 aliases.put("ROHG", HANIFI_ROHINGYA); 9111 aliases.put("RUNR", RUNIC); 9112 aliases.put("SAMR", SAMARITAN); 9113 aliases.put("SARB", OLD_SOUTH_ARABIAN); 9114 aliases.put("SAUR", SAURASHTRA); 9115 aliases.put("SGNW", SIGNWRITING); 9116 aliases.put("SHAW", SHAVIAN); 9117 aliases.put("SHRD", SHARADA); 9118 aliases.put("SIDD", SIDDHAM); 9119 aliases.put("SIND", KHUDAWADI); 9120 aliases.put("SINH", SINHALA); 9121 aliases.put("SOGD", SOGDIAN); 9122 aliases.put("SOGO", OLD_SOGDIAN); 9123 aliases.put("SORA", SORA_SOMPENG); 9124 aliases.put("SOYO", SOYOMBO); 9125 aliases.put("SUND", SUNDANESE); 9126 aliases.put("SUNU", SUNUWAR); 9127 aliases.put("SYLO", SYLOTI_NAGRI); 9128 aliases.put("SYRC", SYRIAC); 9129 aliases.put("TAGB", TAGBANWA); 9130 aliases.put("TAKR", TAKRI); 9131 aliases.put("TALE", TAI_LE); 9132 aliases.put("TALU", NEW_TAI_LUE); 9133 aliases.put("TAML", TAMIL); 9134 aliases.put("TANG", TANGUT); 9135 aliases.put("TAVT", TAI_VIET); 9136 aliases.put("TELU", TELUGU); 9137 aliases.put("TFNG", TIFINAGH); 9138 aliases.put("TGLG", TAGALOG); 9139 aliases.put("THAA", THAANA); 9140 aliases.put("THAI", THAI); 9141 aliases.put("TIBT", TIBETAN); 9142 aliases.put("TIRH", TIRHUTA); 9143 aliases.put("TNSA", TANGSA); 9144 aliases.put("TODR", TODHRI); 9145 aliases.put("TOTO", TOTO); 9146 aliases.put("TUTG", TULU_TIGALARI); 9147 aliases.put("UGAR", UGARITIC); 9148 aliases.put("VAII", VAI); 9149 aliases.put("VITH", VITHKUQI); 9150 aliases.put("WARA", WARANG_CITI); 9151 aliases.put("WCHO", WANCHO); 9152 aliases.put("XPEO", OLD_PERSIAN); 9153 aliases.put("XSUX", CUNEIFORM); 9154 aliases.put("YEZI", YEZIDI); 9155 aliases.put("YIII", YI); 9156 aliases.put("ZANB", ZANABAZAR_SQUARE); 9157 aliases.put("ZINH", INHERITED); 9158 aliases.put("ZYYY", COMMON); 9159 aliases.put("ZZZZ", UNKNOWN); 9160 } 9161 9162 /** 9163 * Returns the enum constant representing the Unicode script of which 9164 * the given character (Unicode code point) is assigned to. 9165 * 9166 * @param codePoint the character (Unicode code point) in question. 9167 * @return The {@code UnicodeScript} constant representing the 9168 * Unicode script of which this character is assigned to. 9169 * 9170 * @throws IllegalArgumentException if the specified 9171 * {@code codePoint} is an invalid Unicode code point. 9172 * @see Character#isValidCodePoint(int) 9173 * 9174 */ 9175 public static UnicodeScript of(int codePoint) { 9176 if (!isValidCodePoint(codePoint)) 9177 throw new IllegalArgumentException( 9178 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9179 int type = getType(codePoint); 9180 // leave SURROGATE and PRIVATE_USE for table lookup 9181 if (type == UNASSIGNED) 9182 return UNKNOWN; 9183 int index = Arrays.binarySearch(scriptStarts, codePoint); 9184 if (index < 0) 9185 index = -index - 2; 9186 return scripts[index]; 9187 } 9188 9189 /** 9190 * Returns the UnicodeScript constant with the given Unicode script 9191 * name or the script name alias. Script names and their aliases are 9192 * determined by The Unicode Standard. The files {@code Scripts.txt} 9193 * and {@code PropertyValueAliases.txt} define script names 9194 * and the script name aliases for a particular version of the 9195 * standard. The {@link Character} class specifies the version of 9196 * the standard that it supports. 9197 * <p> 9198 * Character case is ignored for all of the valid script names. 9199 * The en_US locale's case mapping rules are used to provide 9200 * case-insensitive string comparisons for script name validation. 9201 * 9202 * @param scriptName A {@code UnicodeScript} name. 9203 * @return The {@code UnicodeScript} constant identified 9204 * by {@code scriptName} 9205 * @throws IllegalArgumentException if {@code scriptName} is an 9206 * invalid name 9207 * @throws NullPointerException if {@code scriptName} is null 9208 */ 9209 public static final UnicodeScript forName(String scriptName) { 9210 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 9211 //.replace(' ', '_')); 9212 UnicodeScript sc = aliases.get(scriptName); 9213 if (sc != null) 9214 return sc; 9215 return valueOf(scriptName); 9216 } 9217 } 9218 9219 /** 9220 * The value of the {@code Character}. 9221 * 9222 * @serial 9223 */ 9224 private final char value; 9225 9226 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 9227 @java.io.Serial 9228 private static final long serialVersionUID = 3786198910865385080L; 9229 9230 /** 9231 * Constructs a newly allocated {@code Character} object that 9232 * represents the specified {@code char} value. 9233 * 9234 * @param value the value to be represented by the 9235 * {@code Character} object. 9236 * 9237 * @deprecated 9238 * It is rarely appropriate to use this constructor. The static factory 9239 * {@link #valueOf(char)} is generally a better choice, as it is 9240 * likely to yield significantly better space and time performance. 9241 */ 9242 @Deprecated(since="9") 9243 public Character(char value) { 9244 this.value = value; 9245 } 9246 9247 private static final class CharacterCache { 9248 private CharacterCache(){} 9249 9250 @Stable 9251 static final Character[] cache; 9252 static Character[] archivedCache; 9253 9254 static { 9255 int size = 127 + 1; 9256 9257 // Load and use the archived cache if it exists 9258 CDS.initializeFromArchive(CharacterCache.class); 9259 if (archivedCache == null) { 9260 Character[] c = new Character[size]; 9261 for (int i = 0; i < size; i++) { 9262 c[i] = new Character((char) i); 9263 } 9264 archivedCache = c; 9265 } 9266 cache = archivedCache; 9267 assert cache.length == size; 9268 } 9269 } 9270 9271 /** 9272 * Returns a {@code Character} instance representing the specified 9273 * {@code char} value. 9274 * If a new {@code Character} instance is not required, this method 9275 * should generally be used in preference to the constructor 9276 * {@link #Character(char)}, as this method is likely to yield 9277 * significantly better space and time performance by caching 9278 * frequently requested values. 9279 * 9280 * This method will always cache values in the range {@code 9281 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9282 * cache other values outside of this range. 9283 * 9284 * @param c a char value. 9285 * @return a {@code Character} instance representing {@code c}. 9286 * @since 1.5 9287 */ 9288 @IntrinsicCandidate 9289 @DeserializeConstructor 9290 public static Character valueOf(char c) { 9291 if (c <= 127) { // must cache 9292 return CharacterCache.cache[(int)c]; 9293 } 9294 return new Character(c); 9295 } 9296 9297 /** 9298 * Returns the value of this {@code Character} object. 9299 * @return the primitive {@code char} value represented by 9300 * this object. 9301 */ 9302 @IntrinsicCandidate 9303 public char charValue() { 9304 return value; 9305 } 9306 9307 /** 9308 * Returns a hash code for this {@code Character}; equal to the result 9309 * of invoking {@code charValue()}. 9310 * 9311 * @return a hash code value for this {@code Character} 9312 */ 9313 @Override 9314 public int hashCode() { 9315 return Character.hashCode(value); 9316 } 9317 9318 /** 9319 * Returns a hash code for a {@code char} value; compatible with 9320 * {@code Character.hashCode()}. 9321 * 9322 * @since 1.8 9323 * 9324 * @param value The {@code char} for which to return a hash code. 9325 * @return a hash code value for a {@code char} value. 9326 */ 9327 public static int hashCode(char value) { 9328 return (int)value; 9329 } 9330 9331 /** 9332 * Compares this object against the specified object. 9333 * The result is {@code true} if and only if the argument is not 9334 * {@code null} and is a {@code Character} object that 9335 * represents the same {@code char} value as this object. 9336 * 9337 * @param obj the object to compare with. 9338 * @return {@code true} if the objects are the same; 9339 * {@code false} otherwise. 9340 */ 9341 public boolean equals(Object obj) { 9342 if (obj instanceof Character c) { 9343 return value == c.charValue(); 9344 } 9345 return false; 9346 } 9347 9348 /** 9349 * Returns a {@code String} object representing this 9350 * {@code Character}'s value. The result is a string of 9351 * length 1 whose sole component is the primitive 9352 * {@code char} value represented by this 9353 * {@code Character} object. 9354 * 9355 * @return a string representation of this object. 9356 */ 9357 @Override 9358 public String toString() { 9359 return String.valueOf(value); 9360 } 9361 9362 /** 9363 * Returns a {@code String} object representing the 9364 * specified {@code char}. The result is a string of length 9365 * 1 consisting solely of the specified {@code char}. 9366 * 9367 * @apiNote This method cannot handle <a 9368 * href="#supplementary"> supplementary characters</a>. To support 9369 * all Unicode characters, including supplementary characters, use 9370 * the {@link #toString(int)} method. 9371 * 9372 * @param c the {@code char} to be converted 9373 * @return the string representation of the specified {@code char} 9374 * @since 1.4 9375 */ 9376 public static String toString(char c) { 9377 return String.valueOf(c); 9378 } 9379 9380 /** 9381 * Returns a {@code String} object representing the 9382 * specified character (Unicode code point). The result is a string of 9383 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9384 * 9385 * @param codePoint the {@code codePoint} to be converted 9386 * @return the string representation of the specified {@code codePoint} 9387 * @throws IllegalArgumentException if the specified 9388 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9389 * valid Unicode code point}. 9390 * @since 11 9391 */ 9392 public static String toString(int codePoint) { 9393 return String.valueOfCodePoint(codePoint); 9394 } 9395 9396 /** 9397 * Determines whether the specified code point is a valid 9398 * <a href="http://www.unicode.org/glossary/#code_point"> 9399 * Unicode code point value</a>. 9400 * 9401 * @param codePoint the Unicode code point to be tested 9402 * @return {@code true} if the specified code point value is between 9403 * {@link #MIN_CODE_POINT} and 9404 * {@link #MAX_CODE_POINT} inclusive; 9405 * {@code false} otherwise. 9406 * @since 1.5 9407 */ 9408 public static boolean isValidCodePoint(int codePoint) { 9409 // Optimized form of: 9410 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9411 int plane = codePoint >>> 16; 9412 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9413 } 9414 9415 /** 9416 * Determines whether the specified character (Unicode code point) 9417 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9418 * Such code points can be represented using a single {@code char}. 9419 * 9420 * @param codePoint the character (Unicode code point) to be tested 9421 * @return {@code true} if the specified code point is between 9422 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9423 * {@code false} otherwise. 9424 * @since 1.7 9425 */ 9426 public static boolean isBmpCodePoint(int codePoint) { 9427 return codePoint >>> 16 == 0; 9428 // Optimized form of: 9429 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9430 // We consistently use logical shift (>>>) to facilitate 9431 // additional runtime optimizations. 9432 } 9433 9434 /** 9435 * Determines whether the specified character (Unicode code point) 9436 * is in the <a href="#supplementary">supplementary character</a> range. 9437 * 9438 * @param codePoint the character (Unicode code point) to be tested 9439 * @return {@code true} if the specified code point is between 9440 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9441 * {@link #MAX_CODE_POINT} inclusive; 9442 * {@code false} otherwise. 9443 * @since 1.5 9444 */ 9445 public static boolean isSupplementaryCodePoint(int codePoint) { 9446 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9447 && codePoint < MAX_CODE_POINT + 1; 9448 } 9449 9450 /** 9451 * Determines if the given {@code char} value is a 9452 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9453 * Unicode high-surrogate code unit</a> 9454 * (also known as <i>leading-surrogate code unit</i>). 9455 * 9456 * <p>Such values do not represent characters by themselves, 9457 * but are used in the representation of 9458 * <a href="#supplementary">supplementary characters</a> 9459 * in the UTF-16 encoding. 9460 * 9461 * @param ch the {@code char} value to be tested. 9462 * @return {@code true} if the {@code char} value is between 9463 * {@link #MIN_HIGH_SURROGATE} and 9464 * {@link #MAX_HIGH_SURROGATE} inclusive; 9465 * {@code false} otherwise. 9466 * @see Character#isLowSurrogate(char) 9467 * @see Character.UnicodeBlock#of(int) 9468 * @since 1.5 9469 */ 9470 public static boolean isHighSurrogate(char ch) { 9471 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9472 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9473 } 9474 9475 /** 9476 * Determines if the given {@code char} value is a 9477 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9478 * Unicode low-surrogate code unit</a> 9479 * (also known as <i>trailing-surrogate code unit</i>). 9480 * 9481 * <p>Such values do not represent characters by themselves, 9482 * but are used in the representation of 9483 * <a href="#supplementary">supplementary characters</a> 9484 * in the UTF-16 encoding. 9485 * 9486 * @param ch the {@code char} value to be tested. 9487 * @return {@code true} if the {@code char} value is between 9488 * {@link #MIN_LOW_SURROGATE} and 9489 * {@link #MAX_LOW_SURROGATE} inclusive; 9490 * {@code false} otherwise. 9491 * @see Character#isHighSurrogate(char) 9492 * @since 1.5 9493 */ 9494 public static boolean isLowSurrogate(char ch) { 9495 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9496 } 9497 9498 /** 9499 * Determines if the given {@code char} value is a Unicode 9500 * <i>surrogate code unit</i>. 9501 * 9502 * <p>Such values do not represent characters by themselves, 9503 * but are used in the representation of 9504 * <a href="#supplementary">supplementary characters</a> 9505 * in the UTF-16 encoding. 9506 * 9507 * <p>A char value is a surrogate code unit if and only if it is either 9508 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9509 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9510 * 9511 * @param ch the {@code char} value to be tested. 9512 * @return {@code true} if the {@code char} value is between 9513 * {@link #MIN_SURROGATE} and 9514 * {@link #MAX_SURROGATE} inclusive; 9515 * {@code false} otherwise. 9516 * @since 1.7 9517 */ 9518 public static boolean isSurrogate(char ch) { 9519 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9520 } 9521 9522 /** 9523 * Determines whether the specified pair of {@code char} 9524 * values is a valid 9525 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9526 * Unicode surrogate pair</a>. 9527 * 9528 * <p>This method is equivalent to the expression: 9529 * <blockquote><pre>{@code 9530 * isHighSurrogate(high) && isLowSurrogate(low) 9531 * }</pre></blockquote> 9532 * 9533 * @param high the high-surrogate code value to be tested 9534 * @param low the low-surrogate code value to be tested 9535 * @return {@code true} if the specified high and 9536 * low-surrogate code values represent a valid surrogate pair; 9537 * {@code false} otherwise. 9538 * @since 1.5 9539 */ 9540 public static boolean isSurrogatePair(char high, char low) { 9541 return isHighSurrogate(high) && isLowSurrogate(low); 9542 } 9543 9544 /** 9545 * Determines the number of {@code char} values needed to 9546 * represent the specified character (Unicode code point). If the 9547 * specified character is equal to or greater than 0x10000, then 9548 * the method returns 2. Otherwise, the method returns 1. 9549 * 9550 * <p>This method doesn't validate the specified character to be a 9551 * valid Unicode code point. The caller must validate the 9552 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9553 * if necessary. 9554 * 9555 * @param codePoint the character (Unicode code point) to be tested. 9556 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9557 * @see Character#isSupplementaryCodePoint(int) 9558 * @since 1.5 9559 */ 9560 public static int charCount(int codePoint) { 9561 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9562 } 9563 9564 /** 9565 * Converts the specified surrogate pair to its supplementary code 9566 * point value. This method does not validate the specified 9567 * surrogate pair. The caller must validate it using {@link 9568 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9569 * 9570 * @param high the high-surrogate code unit 9571 * @param low the low-surrogate code unit 9572 * @return the supplementary code point composed from the 9573 * specified surrogate pair. 9574 * @since 1.5 9575 */ 9576 public static int toCodePoint(char high, char low) { 9577 // Optimized form of: 9578 // return ((high - MIN_HIGH_SURROGATE) << 10) 9579 // + (low - MIN_LOW_SURROGATE) 9580 // + MIN_SUPPLEMENTARY_CODE_POINT; 9581 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9582 - (MIN_HIGH_SURROGATE << 10) 9583 - MIN_LOW_SURROGATE); 9584 } 9585 9586 /** 9587 * Returns the code point at the given index of the 9588 * {@code CharSequence}. If the {@code char} value at 9589 * the given index in the {@code CharSequence} is in the 9590 * high-surrogate range, the following index is less than the 9591 * length of the {@code CharSequence}, and the 9592 * {@code char} value at the following index is in the 9593 * low-surrogate range, then the supplementary code point 9594 * corresponding to this surrogate pair is returned. Otherwise, 9595 * the {@code char} value at the given index is returned. 9596 * 9597 * @param seq a sequence of {@code char} values (Unicode code 9598 * units) 9599 * @param index the index to the {@code char} values (Unicode 9600 * code units) in {@code seq} to be converted 9601 * @return the Unicode code point at the given index 9602 * @throws NullPointerException if {@code seq} is null. 9603 * @throws IndexOutOfBoundsException if the value 9604 * {@code index} is negative or not less than 9605 * {@link CharSequence#length() seq.length()}. 9606 * @since 1.5 9607 */ 9608 public static int codePointAt(CharSequence seq, int index) { 9609 char c1 = seq.charAt(index); 9610 if (isHighSurrogate(c1) && ++index < seq.length()) { 9611 char c2 = seq.charAt(index); 9612 if (isLowSurrogate(c2)) { 9613 return toCodePoint(c1, c2); 9614 } 9615 } 9616 return c1; 9617 } 9618 9619 /** 9620 * Returns the code point at the given index of the 9621 * {@code char} array. If the {@code char} value at 9622 * the given index in the {@code char} array is in the 9623 * high-surrogate range, the following index is less than the 9624 * length of the {@code char} array, and the 9625 * {@code char} value at the following index is in the 9626 * low-surrogate range, then the supplementary code point 9627 * corresponding to this surrogate pair is returned. Otherwise, 9628 * the {@code char} value at the given index is returned. 9629 * 9630 * @param a the {@code char} array 9631 * @param index the index to the {@code char} values (Unicode 9632 * code units) in the {@code char} array to be converted 9633 * @return the Unicode code point at the given index 9634 * @throws NullPointerException if {@code a} is null. 9635 * @throws IndexOutOfBoundsException if the value 9636 * {@code index} is negative or not less than 9637 * the length of the {@code char} array. 9638 * @since 1.5 9639 */ 9640 public static int codePointAt(char[] a, int index) { 9641 return codePointAtImpl(a, index, a.length); 9642 } 9643 9644 /** 9645 * Returns the code point at the given index of the 9646 * {@code char} array, where only array elements with 9647 * {@code index} less than {@code limit} can be used. If 9648 * the {@code char} value at the given index in the 9649 * {@code char} array is in the high-surrogate range, the 9650 * following index is less than the {@code limit}, and the 9651 * {@code char} value at the following index is in the 9652 * low-surrogate range, then the supplementary code point 9653 * corresponding to this surrogate pair is returned. Otherwise, 9654 * the {@code char} value at the given index is returned. 9655 * 9656 * @param a the {@code char} array 9657 * @param index the index to the {@code char} values (Unicode 9658 * code units) in the {@code char} array to be converted 9659 * @param limit the index after the last array element that 9660 * can be used in the {@code char} array 9661 * @return the Unicode code point at the given index 9662 * @throws NullPointerException if {@code a} is null. 9663 * @throws IndexOutOfBoundsException if the {@code index} 9664 * argument is negative or not less than the {@code limit} 9665 * argument, or if the {@code limit} argument is negative or 9666 * greater than the length of the {@code char} array. 9667 * @since 1.5 9668 */ 9669 public static int codePointAt(char[] a, int index, int limit) { 9670 if (index >= limit || index < 0 || limit > a.length) { 9671 throw new IndexOutOfBoundsException(); 9672 } 9673 return codePointAtImpl(a, index, limit); 9674 } 9675 9676 // throws ArrayIndexOutOfBoundsException if index out of bounds 9677 static int codePointAtImpl(char[] a, int index, int limit) { 9678 char c1 = a[index]; 9679 if (isHighSurrogate(c1) && ++index < limit) { 9680 char c2 = a[index]; 9681 if (isLowSurrogate(c2)) { 9682 return toCodePoint(c1, c2); 9683 } 9684 } 9685 return c1; 9686 } 9687 9688 /** 9689 * Returns the code point preceding the given index of the 9690 * {@code CharSequence}. If the {@code char} value at 9691 * {@code (index - 1)} in the {@code CharSequence} is in 9692 * the low-surrogate range, {@code (index - 2)} is not 9693 * negative, and the {@code char} value at {@code (index - 2)} 9694 * in the {@code CharSequence} is in the 9695 * high-surrogate range, then the supplementary code point 9696 * corresponding to this surrogate pair is returned. Otherwise, 9697 * the {@code char} value at {@code (index - 1)} is 9698 * returned. 9699 * 9700 * @param seq the {@code CharSequence} instance 9701 * @param index the index following the code point that should be returned 9702 * @return the Unicode code point value before the given index. 9703 * @throws NullPointerException if {@code seq} is null. 9704 * @throws IndexOutOfBoundsException if the {@code index} 9705 * argument is less than 1 or greater than {@link 9706 * CharSequence#length() seq.length()}. 9707 * @since 1.5 9708 */ 9709 public static int codePointBefore(CharSequence seq, int index) { 9710 char c2 = seq.charAt(--index); 9711 if (isLowSurrogate(c2) && index > 0) { 9712 char c1 = seq.charAt(--index); 9713 if (isHighSurrogate(c1)) { 9714 return toCodePoint(c1, c2); 9715 } 9716 } 9717 return c2; 9718 } 9719 9720 /** 9721 * Returns the code point preceding the given index of the 9722 * {@code char} array. If the {@code char} value at 9723 * {@code (index - 1)} in the {@code char} array is in 9724 * the low-surrogate range, {@code (index - 2)} is not 9725 * negative, and the {@code char} value at {@code (index - 2)} 9726 * in the {@code char} array is in the 9727 * high-surrogate range, then the supplementary code point 9728 * corresponding to this surrogate pair is returned. Otherwise, 9729 * the {@code char} value at {@code (index - 1)} is 9730 * returned. 9731 * 9732 * @param a the {@code char} array 9733 * @param index the index following the code point that should be returned 9734 * @return the Unicode code point value before the given index. 9735 * @throws NullPointerException if {@code a} is null. 9736 * @throws IndexOutOfBoundsException if the {@code index} 9737 * argument is less than 1 or greater than the length of the 9738 * {@code char} array 9739 * @since 1.5 9740 */ 9741 public static int codePointBefore(char[] a, int index) { 9742 return codePointBeforeImpl(a, index, 0); 9743 } 9744 9745 /** 9746 * Returns the code point preceding the given index of the 9747 * {@code char} array, where only array elements with 9748 * {@code index} greater than or equal to {@code start} 9749 * can be used. If the {@code char} value at {@code (index - 1)} 9750 * in the {@code char} array is in the 9751 * low-surrogate range, {@code (index - 2)} is not less than 9752 * {@code start}, and the {@code char} value at 9753 * {@code (index - 2)} in the {@code char} array is in 9754 * the high-surrogate range, then the supplementary code point 9755 * corresponding to this surrogate pair is returned. Otherwise, 9756 * the {@code char} value at {@code (index - 1)} is 9757 * returned. 9758 * 9759 * @param a the {@code char} array 9760 * @param index the index following the code point that should be returned 9761 * @param start the index of the first array element in the 9762 * {@code char} array 9763 * @return the Unicode code point value before the given index. 9764 * @throws NullPointerException if {@code a} is null. 9765 * @throws IndexOutOfBoundsException if the {@code index} 9766 * argument is not greater than the {@code start} argument or 9767 * is greater than the length of the {@code char} array, or 9768 * if the {@code start} argument is negative or not less than 9769 * the length of the {@code char} array. 9770 * @since 1.5 9771 */ 9772 public static int codePointBefore(char[] a, int index, int start) { 9773 if (index <= start || start < 0 || index > a.length) { 9774 throw new IndexOutOfBoundsException(); 9775 } 9776 return codePointBeforeImpl(a, index, start); 9777 } 9778 9779 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds 9780 static int codePointBeforeImpl(char[] a, int index, int start) { 9781 char c2 = a[--index]; 9782 if (isLowSurrogate(c2) && index > start) { 9783 char c1 = a[--index]; 9784 if (isHighSurrogate(c1)) { 9785 return toCodePoint(c1, c2); 9786 } 9787 } 9788 return c2; 9789 } 9790 9791 /** 9792 * Returns the leading surrogate (a 9793 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9794 * high surrogate code unit</a>) of the 9795 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9796 * surrogate pair</a> 9797 * representing the specified supplementary character (Unicode 9798 * code point) in the UTF-16 encoding. If the specified character 9799 * is not a 9800 * <a href="Character.html#supplementary">supplementary character</a>, 9801 * an unspecified {@code char} is returned. 9802 * 9803 * <p>If 9804 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9805 * is {@code true}, then 9806 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9807 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9808 * are also always {@code true}. 9809 * 9810 * @param codePoint a supplementary character (Unicode code point) 9811 * @return the leading surrogate code unit used to represent the 9812 * character in the UTF-16 encoding 9813 * @since 1.7 9814 */ 9815 public static char highSurrogate(int codePoint) { 9816 return (char) ((codePoint >>> 10) 9817 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9818 } 9819 9820 /** 9821 * Returns the trailing surrogate (a 9822 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9823 * low surrogate code unit</a>) of the 9824 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9825 * surrogate pair</a> 9826 * representing the specified supplementary character (Unicode 9827 * code point) in the UTF-16 encoding. If the specified character 9828 * is not a 9829 * <a href="Character.html#supplementary">supplementary character</a>, 9830 * an unspecified {@code char} is returned. 9831 * 9832 * <p>If 9833 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9834 * is {@code true}, then 9835 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9836 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9837 * are also always {@code true}. 9838 * 9839 * @param codePoint a supplementary character (Unicode code point) 9840 * @return the trailing surrogate code unit used to represent the 9841 * character in the UTF-16 encoding 9842 * @since 1.7 9843 */ 9844 public static char lowSurrogate(int codePoint) { 9845 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9846 } 9847 9848 /** 9849 * Converts the specified character (Unicode code point) to its 9850 * UTF-16 representation. If the specified code point is a BMP 9851 * (Basic Multilingual Plane or Plane 0) value, the same value is 9852 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9853 * specified code point is a supplementary character, its 9854 * surrogate values are stored in {@code dst[dstIndex]} 9855 * (high-surrogate) and {@code dst[dstIndex+1]} 9856 * (low-surrogate), and 2 is returned. 9857 * 9858 * @param codePoint the character (Unicode code point) to be converted. 9859 * @param dst an array of {@code char} in which the 9860 * {@code codePoint}'s UTF-16 value is stored. 9861 * @param dstIndex the start index into the {@code dst} 9862 * array where the converted value is stored. 9863 * @return 1 if the code point is a BMP code point, 2 if the 9864 * code point is a supplementary code point. 9865 * @throws IllegalArgumentException if the specified 9866 * {@code codePoint} is not a valid Unicode code point. 9867 * @throws NullPointerException if the specified {@code dst} is null. 9868 * @throws IndexOutOfBoundsException if {@code dstIndex} 9869 * is negative or not less than {@code dst.length}, or if 9870 * {@code dst} at {@code dstIndex} doesn't have enough 9871 * array element(s) to store the resulting {@code char} 9872 * value(s). (If {@code dstIndex} is equal to 9873 * {@code dst.length-1} and the specified 9874 * {@code codePoint} is a supplementary character, the 9875 * high-surrogate value is not stored in 9876 * {@code dst[dstIndex]}.) 9877 * @since 1.5 9878 */ 9879 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9880 if (isBmpCodePoint(codePoint)) { 9881 dst[dstIndex] = (char) codePoint; 9882 return 1; 9883 } else if (isValidCodePoint(codePoint)) { 9884 toSurrogates(codePoint, dst, dstIndex); 9885 return 2; 9886 } else { 9887 throw new IllegalArgumentException( 9888 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9889 } 9890 } 9891 9892 /** 9893 * Converts the specified character (Unicode code point) to its 9894 * UTF-16 representation stored in a {@code char} array. If 9895 * the specified code point is a BMP (Basic Multilingual Plane or 9896 * Plane 0) value, the resulting {@code char} array has 9897 * the same value as {@code codePoint}. If the specified code 9898 * point is a supplementary code point, the resulting 9899 * {@code char} array has the corresponding surrogate pair. 9900 * 9901 * @param codePoint a Unicode code point 9902 * @return a {@code char} array having 9903 * {@code codePoint}'s UTF-16 representation. 9904 * @throws IllegalArgumentException if the specified 9905 * {@code codePoint} is not a valid Unicode code point. 9906 * @since 1.5 9907 */ 9908 public static char[] toChars(int codePoint) { 9909 if (isBmpCodePoint(codePoint)) { 9910 return new char[] { (char) codePoint }; 9911 } else if (isValidCodePoint(codePoint)) { 9912 char[] result = new char[2]; 9913 toSurrogates(codePoint, result, 0); 9914 return result; 9915 } else { 9916 throw new IllegalArgumentException( 9917 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9918 } 9919 } 9920 9921 static void toSurrogates(int codePoint, char[] dst, int index) { 9922 // We write elements "backwards" to guarantee all-or-nothing 9923 dst[index+1] = lowSurrogate(codePoint); 9924 dst[index] = highSurrogate(codePoint); 9925 } 9926 9927 /** 9928 * Returns the number of Unicode code points in the text range of 9929 * the specified char sequence. The text range begins at the 9930 * specified {@code beginIndex} and extends to the 9931 * {@code char} at index {@code endIndex - 1}. Thus the 9932 * length (in {@code char}s) of the text range is 9933 * {@code endIndex-beginIndex}. Unpaired surrogates within 9934 * the text range count as one code point each. 9935 * 9936 * @param seq the char sequence 9937 * @param beginIndex the index to the first {@code char} of 9938 * the text range. 9939 * @param endIndex the index after the last {@code char} of 9940 * the text range. 9941 * @return the number of Unicode code points in the specified text 9942 * range 9943 * @throws NullPointerException if {@code seq} is null. 9944 * @throws IndexOutOfBoundsException if the 9945 * {@code beginIndex} is negative, or {@code endIndex} 9946 * is larger than the length of the given sequence, or 9947 * {@code beginIndex} is larger than {@code endIndex}. 9948 * @since 1.5 9949 */ 9950 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9951 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9952 int n = endIndex - beginIndex; 9953 for (int i = beginIndex; i < endIndex; ) { 9954 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9955 isLowSurrogate(seq.charAt(i))) { 9956 n--; 9957 i++; 9958 } 9959 } 9960 return n; 9961 } 9962 9963 /** 9964 * Returns the number of Unicode code points in a subarray of the 9965 * {@code char} array argument. The {@code offset} 9966 * argument is the index of the first {@code char} of the 9967 * subarray and the {@code count} argument specifies the 9968 * length of the subarray in {@code char}s. Unpaired 9969 * surrogates within the subarray count as one code point each. 9970 * 9971 * @param a the {@code char} array 9972 * @param offset the index of the first {@code char} in the 9973 * given {@code char} array 9974 * @param count the length of the subarray in {@code char}s 9975 * @return the number of Unicode code points in the specified subarray 9976 * @throws NullPointerException if {@code a} is null. 9977 * @throws IndexOutOfBoundsException if {@code offset} or 9978 * {@code count} is negative, or if {@code offset + 9979 * count} is larger than the length of the given array. 9980 * @since 1.5 9981 */ 9982 public static int codePointCount(char[] a, int offset, int count) { 9983 Objects.checkFromIndexSize(offset, count, a.length); 9984 return codePointCountImpl(a, offset, count); 9985 } 9986 9987 static int codePointCountImpl(char[] a, int offset, int count) { 9988 int endIndex = offset + count; 9989 int n = count; 9990 for (int i = offset; i < endIndex; ) { 9991 if (isHighSurrogate(a[i++]) && i < endIndex && 9992 isLowSurrogate(a[i])) { 9993 n--; 9994 i++; 9995 } 9996 } 9997 return n; 9998 } 9999 10000 /** 10001 * Returns the index within the given char sequence that is offset 10002 * from the given {@code index} by {@code codePointOffset} 10003 * code points. Unpaired surrogates within the text range given by 10004 * {@code index} and {@code codePointOffset} count as 10005 * one code point each. 10006 * 10007 * @param seq the char sequence 10008 * @param index the index to be offset 10009 * @param codePointOffset the offset in code points 10010 * @return the index within the char sequence 10011 * @throws NullPointerException if {@code seq} is null. 10012 * @throws IndexOutOfBoundsException if {@code index} 10013 * is negative or larger than the length of the char sequence, 10014 * or if {@code codePointOffset} is positive and the 10015 * subsequence starting with {@code index} has fewer than 10016 * {@code codePointOffset} code points, or if 10017 * {@code codePointOffset} is negative and the subsequence 10018 * before {@code index} has fewer than the absolute value 10019 * of {@code codePointOffset} code points. 10020 * @since 1.5 10021 */ 10022 public static int offsetByCodePoints(CharSequence seq, int index, 10023 int codePointOffset) { 10024 int length = seq.length(); 10025 if (index < 0 || index > length) { 10026 throw new IndexOutOfBoundsException(); 10027 } 10028 10029 int x = index; 10030 if (codePointOffset >= 0) { 10031 int i; 10032 for (i = 0; x < length && i < codePointOffset; i++) { 10033 if (isHighSurrogate(seq.charAt(x++)) && x < length && 10034 isLowSurrogate(seq.charAt(x))) { 10035 x++; 10036 } 10037 } 10038 if (i < codePointOffset) { 10039 throw new IndexOutOfBoundsException(); 10040 } 10041 } else { 10042 int i; 10043 for (i = codePointOffset; x > 0 && i < 0; i++) { 10044 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 10045 isHighSurrogate(seq.charAt(x-1))) { 10046 x--; 10047 } 10048 } 10049 if (i < 0) { 10050 throw new IndexOutOfBoundsException(); 10051 } 10052 } 10053 return x; 10054 } 10055 10056 /** 10057 * Returns the index within the given {@code char} subarray 10058 * that is offset from the given {@code index} by 10059 * {@code codePointOffset} code points. The 10060 * {@code start} and {@code count} arguments specify a 10061 * subarray of the {@code char} array. Unpaired surrogates 10062 * within the text range given by {@code index} and 10063 * {@code codePointOffset} count as one code point each. 10064 * 10065 * @param a the {@code char} array 10066 * @param start the index of the first {@code char} of the 10067 * subarray 10068 * @param count the length of the subarray in {@code char}s 10069 * @param index the index to be offset 10070 * @param codePointOffset the offset in code points 10071 * @return the index within the subarray 10072 * @throws NullPointerException if {@code a} is null. 10073 * @throws IndexOutOfBoundsException 10074 * if {@code start} or {@code count} is negative, 10075 * or if {@code start + count} is larger than the length of 10076 * the given array, 10077 * or if {@code index} is less than {@code start} or 10078 * larger then {@code start + count}, 10079 * or if {@code codePointOffset} is positive and the text range 10080 * starting with {@code index} and ending with {@code start + count - 1} 10081 * has fewer than {@code codePointOffset} code 10082 * points, 10083 * or if {@code codePointOffset} is negative and the text range 10084 * starting with {@code start} and ending with {@code index - 1} 10085 * has fewer than the absolute value of 10086 * {@code codePointOffset} code points. 10087 * @since 1.5 10088 */ 10089 public static int offsetByCodePoints(char[] a, int start, int count, 10090 int index, int codePointOffset) { 10091 if (count > a.length-start || start < 0 || count < 0 10092 || index < start || index > start+count) { 10093 throw new IndexOutOfBoundsException(); 10094 } 10095 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 10096 } 10097 10098 static int offsetByCodePointsImpl(char[]a, int start, int count, 10099 int index, int codePointOffset) { 10100 int x = index; 10101 if (codePointOffset >= 0) { 10102 int limit = start + count; 10103 int i; 10104 for (i = 0; x < limit && i < codePointOffset; i++) { 10105 if (isHighSurrogate(a[x++]) && x < limit && 10106 isLowSurrogate(a[x])) { 10107 x++; 10108 } 10109 } 10110 if (i < codePointOffset) { 10111 throw new IndexOutOfBoundsException(); 10112 } 10113 } else { 10114 int i; 10115 for (i = codePointOffset; x > start && i < 0; i++) { 10116 if (isLowSurrogate(a[--x]) && x > start && 10117 isHighSurrogate(a[x-1])) { 10118 x--; 10119 } 10120 } 10121 if (i < 0) { 10122 throw new IndexOutOfBoundsException(); 10123 } 10124 } 10125 return x; 10126 } 10127 10128 /** 10129 * Determines if the specified character is a lowercase character. 10130 * <p> 10131 * A character is lowercase if its general category type, provided 10132 * by {@code Character.getType(ch)}, is 10133 * {@code LOWERCASE_LETTER}, or it has contributory property 10134 * Other_Lowercase as defined by the Unicode Standard. 10135 * <p> 10136 * The following are examples of lowercase characters: 10137 * <blockquote><pre> 10138 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10139 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10140 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10141 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10142 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10143 * </pre></blockquote> 10144 * <p> Many other Unicode characters are lowercase too. 10145 * 10146 * <p><b>Note:</b> This method cannot handle <a 10147 * href="#supplementary"> supplementary characters</a>. To support 10148 * all Unicode characters, including supplementary characters, use 10149 * the {@link #isLowerCase(int)} method. 10150 * 10151 * @param ch the character to be tested. 10152 * @return {@code true} if the character is lowercase; 10153 * {@code false} otherwise. 10154 * @see Character#isLowerCase(char) 10155 * @see Character#isTitleCase(char) 10156 * @see Character#toLowerCase(char) 10157 * @see Character#getType(char) 10158 */ 10159 public static boolean isLowerCase(char ch) { 10160 return isLowerCase((int)ch); 10161 } 10162 10163 /** 10164 * Determines if the specified character (Unicode code point) is a 10165 * lowercase character. 10166 * <p> 10167 * A character is lowercase if its general category type, provided 10168 * by {@link Character#getType getType(codePoint)}, is 10169 * {@code LOWERCASE_LETTER}, or it has contributory property 10170 * Other_Lowercase as defined by the Unicode Standard. 10171 * <p> 10172 * The following are examples of lowercase characters: 10173 * <blockquote><pre> 10174 * a b c d e f g h i j k l m n o p q r s t u v w x y z 10175 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 10176 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 10177 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 10178 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 10179 * </pre></blockquote> 10180 * <p> Many other Unicode characters are lowercase too. 10181 * 10182 * @param codePoint the character (Unicode code point) to be tested. 10183 * @return {@code true} if the character is lowercase; 10184 * {@code false} otherwise. 10185 * @see Character#isLowerCase(int) 10186 * @see Character#isTitleCase(int) 10187 * @see Character#toLowerCase(int) 10188 * @see Character#getType(int) 10189 * @since 1.5 10190 */ 10191 public static boolean isLowerCase(int codePoint) { 10192 return CharacterData.of(codePoint).isLowerCase(codePoint); 10193 } 10194 10195 /** 10196 * Determines if the specified character is an uppercase character. 10197 * <p> 10198 * A character is uppercase if its general category type, provided by 10199 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 10200 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10201 * <p> 10202 * The following are examples of uppercase characters: 10203 * <blockquote><pre> 10204 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10205 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10206 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10207 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10208 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10209 * </pre></blockquote> 10210 * <p> Many other Unicode characters are uppercase too. 10211 * 10212 * <p><b>Note:</b> This method cannot handle <a 10213 * href="#supplementary"> supplementary characters</a>. To support 10214 * all Unicode characters, including supplementary characters, use 10215 * the {@link #isUpperCase(int)} method. 10216 * 10217 * @param ch the character to be tested. 10218 * @return {@code true} if the character is uppercase; 10219 * {@code false} otherwise. 10220 * @see Character#isLowerCase(char) 10221 * @see Character#isTitleCase(char) 10222 * @see Character#toUpperCase(char) 10223 * @see Character#getType(char) 10224 * @since 1.0 10225 */ 10226 public static boolean isUpperCase(char ch) { 10227 return isUpperCase((int)ch); 10228 } 10229 10230 /** 10231 * Determines if the specified character (Unicode code point) is an uppercase character. 10232 * <p> 10233 * A character is uppercase if its general category type, provided by 10234 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 10235 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10236 * <p> 10237 * The following are examples of uppercase characters: 10238 * <blockquote><pre> 10239 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10240 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10241 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10242 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10243 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10244 * </pre></blockquote> 10245 * <p> Many other Unicode characters are uppercase too. 10246 * 10247 * @param codePoint the character (Unicode code point) to be tested. 10248 * @return {@code true} if the character is uppercase; 10249 * {@code false} otherwise. 10250 * @see Character#isLowerCase(int) 10251 * @see Character#isTitleCase(int) 10252 * @see Character#toUpperCase(int) 10253 * @see Character#getType(int) 10254 * @since 1.5 10255 */ 10256 public static boolean isUpperCase(int codePoint) { 10257 return CharacterData.of(codePoint).isUpperCase(codePoint); 10258 } 10259 10260 /** 10261 * Determines if the specified character is a titlecase character. 10262 * <p> 10263 * A character is a titlecase character if its general 10264 * category type, provided by {@code Character.getType(ch)}, 10265 * is {@code TITLECASE_LETTER}. 10266 * <p> 10267 * Some characters look like pairs of Latin letters. For example, there 10268 * is an uppercase letter that looks like "LJ" and has a corresponding 10269 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10270 * is the appropriate form to use when rendering a word in lowercase 10271 * with initial capitals, as for a book title. 10272 * <p> 10273 * These are some of the Unicode characters for which this method returns 10274 * {@code true}: 10275 * <ul> 10276 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10277 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10278 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10279 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10280 * </ul> 10281 * <p> Many other Unicode characters are titlecase too. 10282 * 10283 * <p><b>Note:</b> This method cannot handle <a 10284 * href="#supplementary"> supplementary characters</a>. To support 10285 * all Unicode characters, including supplementary characters, use 10286 * the {@link #isTitleCase(int)} method. 10287 * 10288 * @param ch the character to be tested. 10289 * @return {@code true} if the character is titlecase; 10290 * {@code false} otherwise. 10291 * @see Character#isLowerCase(char) 10292 * @see Character#isUpperCase(char) 10293 * @see Character#toTitleCase(char) 10294 * @see Character#getType(char) 10295 * @since 1.0.2 10296 */ 10297 public static boolean isTitleCase(char ch) { 10298 return isTitleCase((int)ch); 10299 } 10300 10301 /** 10302 * Determines if the specified character (Unicode code point) is a titlecase character. 10303 * <p> 10304 * A character is a titlecase character if its general 10305 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10306 * is {@code TITLECASE_LETTER}. 10307 * <p> 10308 * Some characters look like pairs of Latin letters. For example, there 10309 * is an uppercase letter that looks like "LJ" and has a corresponding 10310 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10311 * is the appropriate form to use when rendering a word in lowercase 10312 * with initial capitals, as for a book title. 10313 * <p> 10314 * These are some of the Unicode characters for which this method returns 10315 * {@code true}: 10316 * <ul> 10317 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10318 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10319 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10320 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10321 * </ul> 10322 * <p> Many other Unicode characters are titlecase too. 10323 * 10324 * @param codePoint the character (Unicode code point) to be tested. 10325 * @return {@code true} if the character is titlecase; 10326 * {@code false} otherwise. 10327 * @see Character#isLowerCase(int) 10328 * @see Character#isUpperCase(int) 10329 * @see Character#toTitleCase(int) 10330 * @see Character#getType(int) 10331 * @since 1.5 10332 */ 10333 public static boolean isTitleCase(int codePoint) { 10334 return getType(codePoint) == Character.TITLECASE_LETTER; 10335 } 10336 10337 /** 10338 * Determines if the specified character is a digit. 10339 * <p> 10340 * A character is a digit if its general category type, provided 10341 * by {@code Character.getType(ch)}, is 10342 * {@code DECIMAL_DIGIT_NUMBER}. 10343 * <p> 10344 * Some Unicode character ranges that contain digits: 10345 * <ul> 10346 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10347 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10348 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10349 * Arabic-Indic digits 10350 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10351 * Extended Arabic-Indic digits 10352 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10353 * Devanagari digits 10354 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10355 * Fullwidth digits 10356 * </ul> 10357 * 10358 * Many other character ranges contain digits as well. 10359 * 10360 * <p><b>Note:</b> This method cannot handle <a 10361 * href="#supplementary"> supplementary characters</a>. To support 10362 * all Unicode characters, including supplementary characters, use 10363 * the {@link #isDigit(int)} method. 10364 * 10365 * @param ch the character to be tested. 10366 * @return {@code true} if the character is a digit; 10367 * {@code false} otherwise. 10368 * @see Character#digit(char, int) 10369 * @see Character#forDigit(int, int) 10370 * @see Character#getType(char) 10371 */ 10372 public static boolean isDigit(char ch) { 10373 return isDigit((int)ch); 10374 } 10375 10376 /** 10377 * Determines if the specified character (Unicode code point) is a digit. 10378 * <p> 10379 * A character is a digit if its general category type, provided 10380 * by {@link Character#getType(int) getType(codePoint)}, is 10381 * {@code DECIMAL_DIGIT_NUMBER}. 10382 * <p> 10383 * Some Unicode character ranges that contain digits: 10384 * <ul> 10385 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10386 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10387 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10388 * Arabic-Indic digits 10389 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10390 * Extended Arabic-Indic digits 10391 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10392 * Devanagari digits 10393 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10394 * Fullwidth digits 10395 * </ul> 10396 * 10397 * Many other character ranges contain digits as well. 10398 * 10399 * @param codePoint the character (Unicode code point) to be tested. 10400 * @return {@code true} if the character is a digit; 10401 * {@code false} otherwise. 10402 * @see Character#forDigit(int, int) 10403 * @see Character#getType(int) 10404 * @since 1.5 10405 */ 10406 public static boolean isDigit(int codePoint) { 10407 return CharacterData.of(codePoint).isDigit(codePoint); 10408 } 10409 10410 /** 10411 * Determines if a character is defined in Unicode. 10412 * <p> 10413 * A character is defined if at least one of the following is true: 10414 * <ul> 10415 * <li>It has an entry in the UnicodeData file. 10416 * <li>It has a value in a range defined by the UnicodeData file. 10417 * </ul> 10418 * 10419 * <p><b>Note:</b> This method cannot handle <a 10420 * href="#supplementary"> supplementary characters</a>. To support 10421 * all Unicode characters, including supplementary characters, use 10422 * the {@link #isDefined(int)} method. 10423 * 10424 * @param ch the character to be tested 10425 * @return {@code true} if the character has a defined meaning 10426 * in Unicode; {@code false} otherwise. 10427 * @see Character#isDigit(char) 10428 * @see Character#isLetter(char) 10429 * @see Character#isLetterOrDigit(char) 10430 * @see Character#isLowerCase(char) 10431 * @see Character#isTitleCase(char) 10432 * @see Character#isUpperCase(char) 10433 * @since 1.0.2 10434 */ 10435 public static boolean isDefined(char ch) { 10436 return isDefined((int)ch); 10437 } 10438 10439 /** 10440 * Determines if a character (Unicode code point) is defined in Unicode. 10441 * <p> 10442 * A character is defined if at least one of the following is true: 10443 * <ul> 10444 * <li>It has an entry in the UnicodeData file. 10445 * <li>It has a value in a range defined by the UnicodeData file. 10446 * </ul> 10447 * 10448 * @param codePoint the character (Unicode code point) to be tested. 10449 * @return {@code true} if the character has a defined meaning 10450 * in Unicode; {@code false} otherwise. 10451 * @see Character#isDigit(int) 10452 * @see Character#isLetter(int) 10453 * @see Character#isLetterOrDigit(int) 10454 * @see Character#isLowerCase(int) 10455 * @see Character#isTitleCase(int) 10456 * @see Character#isUpperCase(int) 10457 * @since 1.5 10458 */ 10459 public static boolean isDefined(int codePoint) { 10460 return getType(codePoint) != Character.UNASSIGNED; 10461 } 10462 10463 /** 10464 * Determines if the specified character is a letter. 10465 * <p> 10466 * A character is considered to be a letter if its general 10467 * category type, provided by {@code Character.getType(ch)}, 10468 * is any of the following: 10469 * <ul> 10470 * <li> {@code UPPERCASE_LETTER} 10471 * <li> {@code LOWERCASE_LETTER} 10472 * <li> {@code TITLECASE_LETTER} 10473 * <li> {@code MODIFIER_LETTER} 10474 * <li> {@code OTHER_LETTER} 10475 * </ul> 10476 * 10477 * Not all letters have case. Many characters are 10478 * letters but are neither uppercase nor lowercase nor titlecase. 10479 * 10480 * <p><b>Note:</b> This method cannot handle <a 10481 * href="#supplementary"> supplementary characters</a>. To support 10482 * all Unicode characters, including supplementary characters, use 10483 * the {@link #isLetter(int)} method. 10484 * 10485 * @param ch the character to be tested. 10486 * @return {@code true} if the character is a letter; 10487 * {@code false} otherwise. 10488 * @see Character#isDigit(char) 10489 * @see Character#isJavaIdentifierStart(char) 10490 * @see Character#isJavaLetter(char) 10491 * @see Character#isJavaLetterOrDigit(char) 10492 * @see Character#isLetterOrDigit(char) 10493 * @see Character#isLowerCase(char) 10494 * @see Character#isTitleCase(char) 10495 * @see Character#isUnicodeIdentifierStart(char) 10496 * @see Character#isUpperCase(char) 10497 */ 10498 public static boolean isLetter(char ch) { 10499 return isLetter((int)ch); 10500 } 10501 10502 /** 10503 * Determines if the specified character (Unicode code point) is a letter. 10504 * <p> 10505 * A character is considered to be a letter if its general 10506 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10507 * is any of the following: 10508 * <ul> 10509 * <li> {@code UPPERCASE_LETTER} 10510 * <li> {@code LOWERCASE_LETTER} 10511 * <li> {@code TITLECASE_LETTER} 10512 * <li> {@code MODIFIER_LETTER} 10513 * <li> {@code OTHER_LETTER} 10514 * </ul> 10515 * 10516 * Not all letters have case. Many characters are 10517 * letters but are neither uppercase nor lowercase nor titlecase. 10518 * 10519 * @param codePoint the character (Unicode code point) to be tested. 10520 * @return {@code true} if the character is a letter; 10521 * {@code false} otherwise. 10522 * @see Character#isDigit(int) 10523 * @see Character#isJavaIdentifierStart(int) 10524 * @see Character#isLetterOrDigit(int) 10525 * @see Character#isLowerCase(int) 10526 * @see Character#isTitleCase(int) 10527 * @see Character#isUnicodeIdentifierStart(int) 10528 * @see Character#isUpperCase(int) 10529 * @since 1.5 10530 */ 10531 public static boolean isLetter(int codePoint) { 10532 return ((((1 << Character.UPPERCASE_LETTER) | 10533 (1 << Character.LOWERCASE_LETTER) | 10534 (1 << Character.TITLECASE_LETTER) | 10535 (1 << Character.MODIFIER_LETTER) | 10536 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10537 != 0; 10538 } 10539 10540 /** 10541 * Determines if the specified character is a letter or digit. 10542 * <p> 10543 * A character is considered to be a letter or digit if either 10544 * {@code Character.isLetter(char ch)} or 10545 * {@code Character.isDigit(char ch)} returns 10546 * {@code true} for the character. 10547 * 10548 * <p><b>Note:</b> This method cannot handle <a 10549 * href="#supplementary"> supplementary characters</a>. To support 10550 * all Unicode characters, including supplementary characters, use 10551 * the {@link #isLetterOrDigit(int)} method. 10552 * 10553 * @param ch the character to be tested. 10554 * @return {@code true} if the character is a letter or digit; 10555 * {@code false} otherwise. 10556 * @see Character#isDigit(char) 10557 * @see Character#isJavaIdentifierPart(char) 10558 * @see Character#isJavaLetter(char) 10559 * @see Character#isJavaLetterOrDigit(char) 10560 * @see Character#isLetter(char) 10561 * @see Character#isUnicodeIdentifierPart(char) 10562 * @since 1.0.2 10563 */ 10564 public static boolean isLetterOrDigit(char ch) { 10565 return isLetterOrDigit((int)ch); 10566 } 10567 10568 /** 10569 * Determines if the specified character (Unicode code point) is a letter or digit. 10570 * <p> 10571 * A character is considered to be a letter or digit if either 10572 * {@link #isLetter(int) isLetter(codePoint)} or 10573 * {@link #isDigit(int) isDigit(codePoint)} returns 10574 * {@code true} for the character. 10575 * 10576 * @param codePoint the character (Unicode code point) to be tested. 10577 * @return {@code true} if the character is a letter or digit; 10578 * {@code false} otherwise. 10579 * @see Character#isDigit(int) 10580 * @see Character#isJavaIdentifierPart(int) 10581 * @see Character#isLetter(int) 10582 * @see Character#isUnicodeIdentifierPart(int) 10583 * @since 1.5 10584 */ 10585 public static boolean isLetterOrDigit(int codePoint) { 10586 return ((((1 << Character.UPPERCASE_LETTER) | 10587 (1 << Character.LOWERCASE_LETTER) | 10588 (1 << Character.TITLECASE_LETTER) | 10589 (1 << Character.MODIFIER_LETTER) | 10590 (1 << Character.OTHER_LETTER) | 10591 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10592 != 0; 10593 } 10594 10595 /** 10596 * Determines if the specified character is permissible as the first 10597 * character in a Java identifier. 10598 * <p> 10599 * A character may start a Java identifier if and only if 10600 * one of the following conditions is true: 10601 * <ul> 10602 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10603 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10604 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10605 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10606 * </ul> 10607 * 10608 * @param ch the character to be tested. 10609 * @return {@code true} if the character may start a Java 10610 * identifier; {@code false} otherwise. 10611 * @see Character#isJavaLetterOrDigit(char) 10612 * @see Character#isJavaIdentifierStart(char) 10613 * @see Character#isJavaIdentifierPart(char) 10614 * @see Character#isLetter(char) 10615 * @see Character#isLetterOrDigit(char) 10616 * @see Character#isUnicodeIdentifierStart(char) 10617 * @since 1.0.2 10618 * @deprecated Replaced by isJavaIdentifierStart(char). 10619 */ 10620 @Deprecated(since="1.1") 10621 public static boolean isJavaLetter(char ch) { 10622 return isJavaIdentifierStart(ch); 10623 } 10624 10625 /** 10626 * Determines if the specified character may be part of a Java 10627 * identifier as other than the first character. 10628 * <p> 10629 * A character may be part of a Java identifier if and only if one 10630 * of the following conditions is true: 10631 * <ul> 10632 * <li> it is a letter 10633 * <li> it is a currency symbol (such as {@code '$'}) 10634 * <li> it is a connecting punctuation character (such as {@code '_'}) 10635 * <li> it is a digit 10636 * <li> it is a numeric letter (such as a Roman numeral character) 10637 * <li> it is a combining mark 10638 * <li> it is a non-spacing mark 10639 * <li> {@code isIdentifierIgnorable} returns 10640 * {@code true} for the character. 10641 * </ul> 10642 * 10643 * @param ch the character to be tested. 10644 * @return {@code true} if the character may be part of a 10645 * Java identifier; {@code false} otherwise. 10646 * @see Character#isJavaLetter(char) 10647 * @see Character#isJavaIdentifierStart(char) 10648 * @see Character#isJavaIdentifierPart(char) 10649 * @see Character#isLetter(char) 10650 * @see Character#isLetterOrDigit(char) 10651 * @see Character#isUnicodeIdentifierPart(char) 10652 * @see Character#isIdentifierIgnorable(char) 10653 * @since 1.0.2 10654 * @deprecated Replaced by isJavaIdentifierPart(char). 10655 */ 10656 @Deprecated(since="1.1") 10657 public static boolean isJavaLetterOrDigit(char ch) { 10658 return isJavaIdentifierPart(ch); 10659 } 10660 10661 /** 10662 * Determines if the specified character (Unicode code point) is alphabetic. 10663 * <p> 10664 * A character is considered to be alphabetic if its general category type, 10665 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10666 * the following: 10667 * <ul> 10668 * <li> {@code UPPERCASE_LETTER} 10669 * <li> {@code LOWERCASE_LETTER} 10670 * <li> {@code TITLECASE_LETTER} 10671 * <li> {@code MODIFIER_LETTER} 10672 * <li> {@code OTHER_LETTER} 10673 * <li> {@code LETTER_NUMBER} 10674 * </ul> 10675 * or it has contributory property Other_Alphabetic as defined by the 10676 * Unicode Standard. 10677 * 10678 * @param codePoint the character (Unicode code point) to be tested. 10679 * @return {@code true} if the character is a Unicode alphabet 10680 * character, {@code false} otherwise. 10681 * @since 1.7 10682 */ 10683 public static boolean isAlphabetic(int codePoint) { 10684 return (((((1 << Character.UPPERCASE_LETTER) | 10685 (1 << Character.LOWERCASE_LETTER) | 10686 (1 << Character.TITLECASE_LETTER) | 10687 (1 << Character.MODIFIER_LETTER) | 10688 (1 << Character.OTHER_LETTER) | 10689 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10690 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10691 } 10692 10693 /** 10694 * Determines if the specified character (Unicode code point) is a CJKV 10695 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10696 * the Unicode Standard. 10697 * 10698 * @param codePoint the character (Unicode code point) to be tested. 10699 * @return {@code true} if the character is a Unicode ideograph 10700 * character, {@code false} otherwise. 10701 * @since 1.7 10702 */ 10703 public static boolean isIdeographic(int codePoint) { 10704 return CharacterData.of(codePoint).isIdeographic(codePoint); 10705 } 10706 10707 /** 10708 * Determines if the specified character is 10709 * permissible as the first character in a Java identifier. 10710 * <p> 10711 * A character may start a Java identifier if and only if 10712 * one of the following conditions is true: 10713 * <ul> 10714 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10715 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10716 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10717 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10718 * </ul> 10719 * 10720 * <p><b>Note:</b> This method cannot handle <a 10721 * href="#supplementary"> supplementary characters</a>. To support 10722 * all Unicode characters, including supplementary characters, use 10723 * the {@link #isJavaIdentifierStart(int)} method. 10724 * 10725 * @param ch the character to be tested. 10726 * @return {@code true} if the character may start a Java identifier; 10727 * {@code false} otherwise. 10728 * @see Character#isJavaIdentifierPart(char) 10729 * @see Character#isLetter(char) 10730 * @see Character#isUnicodeIdentifierStart(char) 10731 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10732 * @since 1.1 10733 */ 10734 @SuppressWarnings("doclint:reference") // cross-module links 10735 public static boolean isJavaIdentifierStart(char ch) { 10736 return isJavaIdentifierStart((int)ch); 10737 } 10738 10739 /** 10740 * Determines if the character (Unicode code point) is 10741 * permissible as the first character in a Java identifier. 10742 * <p> 10743 * A character may start a Java identifier if and only if 10744 * one of the following conditions is true: 10745 * <ul> 10746 * <li> {@link #isLetter(int) isLetter(codePoint)} 10747 * returns {@code true} 10748 * <li> {@link #getType(int) getType(codePoint)} 10749 * returns {@code LETTER_NUMBER} 10750 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10751 * <li> the referenced character is a connecting punctuation character 10752 * (such as {@code '_'}). 10753 * </ul> 10754 * 10755 * @param codePoint the character (Unicode code point) to be tested. 10756 * @return {@code true} if the character may start a Java identifier; 10757 * {@code false} otherwise. 10758 * @see Character#isJavaIdentifierPart(int) 10759 * @see Character#isLetter(int) 10760 * @see Character#isUnicodeIdentifierStart(int) 10761 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10762 * @since 1.5 10763 */ 10764 @SuppressWarnings("doclint:reference") // cross-module links 10765 public static boolean isJavaIdentifierStart(int codePoint) { 10766 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10767 } 10768 10769 /** 10770 * Determines if the specified character may be part of a Java 10771 * identifier as other than the first character. 10772 * <p> 10773 * A character may be part of a Java identifier if any of the following 10774 * conditions are true: 10775 * <ul> 10776 * <li> it is a letter 10777 * <li> it is a currency symbol (such as {@code '$'}) 10778 * <li> it is a connecting punctuation character (such as {@code '_'}) 10779 * <li> it is a digit 10780 * <li> it is a numeric letter (such as a Roman numeral character) 10781 * <li> it is a combining mark 10782 * <li> it is a non-spacing mark 10783 * <li> {@code isIdentifierIgnorable} returns 10784 * {@code true} for the character 10785 * </ul> 10786 * 10787 * <p><b>Note:</b> This method cannot handle <a 10788 * href="#supplementary"> supplementary characters</a>. To support 10789 * all Unicode characters, including supplementary characters, use 10790 * the {@link #isJavaIdentifierPart(int)} method. 10791 * 10792 * @param ch the character to be tested. 10793 * @return {@code true} if the character may be part of a 10794 * Java identifier; {@code false} otherwise. 10795 * @see Character#isIdentifierIgnorable(char) 10796 * @see Character#isJavaIdentifierStart(char) 10797 * @see Character#isLetterOrDigit(char) 10798 * @see Character#isUnicodeIdentifierPart(char) 10799 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10800 * @since 1.1 10801 */ 10802 @SuppressWarnings("doclint:reference") // cross-module links 10803 public static boolean isJavaIdentifierPart(char ch) { 10804 return isJavaIdentifierPart((int)ch); 10805 } 10806 10807 /** 10808 * Determines if the character (Unicode code point) may be part of a Java 10809 * identifier as other than the first character. 10810 * <p> 10811 * A character may be part of a Java identifier if any of the following 10812 * conditions are true: 10813 * <ul> 10814 * <li> it is a letter 10815 * <li> it is a currency symbol (such as {@code '$'}) 10816 * <li> it is a connecting punctuation character (such as {@code '_'}) 10817 * <li> it is a digit 10818 * <li> it is a numeric letter (such as a Roman numeral character) 10819 * <li> it is a combining mark 10820 * <li> it is a non-spacing mark 10821 * <li> {@link #isIdentifierIgnorable(int) 10822 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10823 * the code point 10824 * </ul> 10825 * 10826 * @param codePoint the character (Unicode code point) to be tested. 10827 * @return {@code true} if the character may be part of a 10828 * Java identifier; {@code false} otherwise. 10829 * @see Character#isIdentifierIgnorable(int) 10830 * @see Character#isJavaIdentifierStart(int) 10831 * @see Character#isLetterOrDigit(int) 10832 * @see Character#isUnicodeIdentifierPart(int) 10833 * @see java.compiler/javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10834 * @since 1.5 10835 */ 10836 @SuppressWarnings("doclint:reference") // cross-module links 10837 public static boolean isJavaIdentifierPart(int codePoint) { 10838 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10839 } 10840 10841 /** 10842 * Determines if the specified character is permissible as the 10843 * first character in a Unicode identifier. 10844 * <p> 10845 * A character may start a Unicode identifier if and only if 10846 * one of the following conditions is true: 10847 * <ul> 10848 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10849 * <li> {@link #getType(char) getType(ch)} returns 10850 * {@code LETTER_NUMBER}. 10851 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10852 * {@code Other_ID_Start}</a> character. 10853 * </ul> 10854 * <p> 10855 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10856 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10857 * with the following profile of UAX31: 10858 * <pre> 10859 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10860 * </pre> 10861 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10862 * compatibility. 10863 * 10864 * <p><b>Note:</b> This method cannot handle <a 10865 * href="#supplementary"> supplementary characters</a>. To support 10866 * all Unicode characters, including supplementary characters, use 10867 * the {@link #isUnicodeIdentifierStart(int)} method. 10868 * 10869 * @param ch the character to be tested. 10870 * @return {@code true} if the character may start a Unicode 10871 * identifier; {@code false} otherwise. 10872 * 10873 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10874 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10875 * @see Character#isJavaIdentifierStart(char) 10876 * @see Character#isLetter(char) 10877 * @see Character#isUnicodeIdentifierPart(char) 10878 * @since 1.1 10879 */ 10880 public static boolean isUnicodeIdentifierStart(char ch) { 10881 return isUnicodeIdentifierStart((int)ch); 10882 } 10883 10884 /** 10885 * Determines if the specified character (Unicode code point) is permissible as the 10886 * first character in a Unicode identifier. 10887 * <p> 10888 * A character may start a Unicode identifier if and only if 10889 * one of the following conditions is true: 10890 * <ul> 10891 * <li> {@link #isLetter(int) isLetter(codePoint)} 10892 * returns {@code true} 10893 * <li> {@link #getType(int) getType(codePoint)} 10894 * returns {@code LETTER_NUMBER}. 10895 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10896 * {@code Other_ID_Start}</a> character. 10897 * </ul> 10898 * <p> 10899 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10900 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10901 * with the following profile of UAX31: 10902 * <pre> 10903 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10904 * </pre> 10905 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10906 * compatibility. 10907 * 10908 * @param codePoint the character (Unicode code point) to be tested. 10909 * @return {@code true} if the character may start a Unicode 10910 * identifier; {@code false} otherwise. 10911 * 10912 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10913 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10914 * @see Character#isJavaIdentifierStart(int) 10915 * @see Character#isLetter(int) 10916 * @see Character#isUnicodeIdentifierPart(int) 10917 * @since 1.5 10918 */ 10919 public static boolean isUnicodeIdentifierStart(int codePoint) { 10920 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10921 } 10922 10923 /** 10924 * Determines if the specified character may be part of a Unicode 10925 * identifier as other than the first character. 10926 * <p> 10927 * A character may be part of a Unicode identifier if and only if 10928 * one of the following statements is true: 10929 * <ul> 10930 * <li> it is a letter 10931 * <li> it is a connecting punctuation character (such as {@code '_'}) 10932 * <li> it is a digit 10933 * <li> it is a numeric letter (such as a Roman numeral character) 10934 * <li> it is a combining mark 10935 * <li> it is a non-spacing mark 10936 * <li> {@code isIdentifierIgnorable} returns 10937 * {@code true} for this character. 10938 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10939 * {@code Other_ID_Start}</a> character. 10940 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10941 * {@code Other_ID_Continue}</a> character. 10942 * </ul> 10943 * <p> 10944 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10945 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10946 * with the following profile of UAX31: 10947 * <pre> 10948 * Continue := Start + ID_Continue + ignorable 10949 * Medial := empty 10950 * ignorable := isIdentifierIgnorable(char) returns true for the character 10951 * </pre> 10952 * {@code ignorable} is added to {@code Continue} for backward 10953 * compatibility. 10954 * 10955 * <p><b>Note:</b> This method cannot handle <a 10956 * href="#supplementary"> supplementary characters</a>. To support 10957 * all Unicode characters, including supplementary characters, use 10958 * the {@link #isUnicodeIdentifierPart(int)} method. 10959 * 10960 * @param ch the character to be tested. 10961 * @return {@code true} if the character may be part of a 10962 * Unicode identifier; {@code false} otherwise. 10963 * 10964 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10965 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10966 * @see Character#isIdentifierIgnorable(char) 10967 * @see Character#isJavaIdentifierPart(char) 10968 * @see Character#isLetterOrDigit(char) 10969 * @see Character#isUnicodeIdentifierStart(char) 10970 * @since 1.1 10971 */ 10972 public static boolean isUnicodeIdentifierPart(char ch) { 10973 return isUnicodeIdentifierPart((int)ch); 10974 } 10975 10976 /** 10977 * Determines if the specified character (Unicode code point) may be part of a Unicode 10978 * identifier as other than the first character. 10979 * <p> 10980 * A character may be part of a Unicode identifier if and only if 10981 * one of the following statements is true: 10982 * <ul> 10983 * <li> it is a letter 10984 * <li> it is a connecting punctuation character (such as {@code '_'}) 10985 * <li> it is a digit 10986 * <li> it is a numeric letter (such as a Roman numeral character) 10987 * <li> it is a combining mark 10988 * <li> it is a non-spacing mark 10989 * <li> {@code isIdentifierIgnorable} returns 10990 * {@code true} for this character. 10991 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10992 * {@code Other_ID_Start}</a> character. 10993 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10994 * {@code Other_ID_Continue}</a> character. 10995 * </ul> 10996 * <p> 10997 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10998 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10999 * with the following profile of UAX31: 11000 * <pre> 11001 * Continue := Start + ID_Continue + ignorable 11002 * Medial := empty 11003 * ignorable := isIdentifierIgnorable(int) returns true for the character 11004 * </pre> 11005 * {@code ignorable} is added to {@code Continue} for backward 11006 * compatibility. 11007 * 11008 * @param codePoint the character (Unicode code point) to be tested. 11009 * @return {@code true} if the character may be part of a 11010 * Unicode identifier; {@code false} otherwise. 11011 * 11012 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 11013 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 11014 * @see Character#isIdentifierIgnorable(int) 11015 * @see Character#isJavaIdentifierPart(int) 11016 * @see Character#isLetterOrDigit(int) 11017 * @see Character#isUnicodeIdentifierStart(int) 11018 * @since 1.5 11019 */ 11020 public static boolean isUnicodeIdentifierPart(int codePoint) { 11021 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 11022 } 11023 11024 /** 11025 * Determines if the specified character should be regarded as 11026 * an ignorable character in a Java identifier or a Unicode identifier. 11027 * <p> 11028 * The following Unicode characters are ignorable in a Java identifier 11029 * or a Unicode identifier: 11030 * <ul> 11031 * <li>ISO control characters that are not whitespace 11032 * <ul> 11033 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11034 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11035 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11036 * </ul> 11037 * 11038 * <li>all characters that have the {@code FORMAT} general 11039 * category value 11040 * </ul> 11041 * 11042 * <p><b>Note:</b> This method cannot handle <a 11043 * href="#supplementary"> supplementary characters</a>. To support 11044 * all Unicode characters, including supplementary characters, use 11045 * the {@link #isIdentifierIgnorable(int)} method. 11046 * 11047 * @param ch the character to be tested. 11048 * @return {@code true} if the character is an ignorable control 11049 * character that may be part of a Java or Unicode identifier; 11050 * {@code false} otherwise. 11051 * @see Character#isJavaIdentifierPart(char) 11052 * @see Character#isUnicodeIdentifierPart(char) 11053 * @since 1.1 11054 */ 11055 public static boolean isIdentifierIgnorable(char ch) { 11056 return isIdentifierIgnorable((int)ch); 11057 } 11058 11059 /** 11060 * Determines if the specified character (Unicode code point) should be regarded as 11061 * an ignorable character in a Java identifier or a Unicode identifier. 11062 * <p> 11063 * The following Unicode characters are ignorable in a Java identifier 11064 * or a Unicode identifier: 11065 * <ul> 11066 * <li>ISO control characters that are not whitespace 11067 * <ul> 11068 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 11069 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 11070 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 11071 * </ul> 11072 * 11073 * <li>all characters that have the {@code FORMAT} general 11074 * category value 11075 * </ul> 11076 * 11077 * @param codePoint the character (Unicode code point) to be tested. 11078 * @return {@code true} if the character is an ignorable control 11079 * character that may be part of a Java or Unicode identifier; 11080 * {@code false} otherwise. 11081 * @see Character#isJavaIdentifierPart(int) 11082 * @see Character#isUnicodeIdentifierPart(int) 11083 * @since 1.5 11084 */ 11085 public static boolean isIdentifierIgnorable(int codePoint) { 11086 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 11087 } 11088 11089 /** 11090 * Determines if the specified character (Unicode code point) is an Emoji. 11091 * <p> 11092 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 11093 * property, defined in 11094 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11095 * Unicode Emoji (Technical Standard #51)</a>. 11096 * 11097 * @param codePoint the character (Unicode code point) to be tested. 11098 * @return {@code true} if the character is an Emoji; 11099 * {@code false} otherwise. 11100 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11101 * @since 21 11102 */ 11103 public static boolean isEmoji(int codePoint) { 11104 return CharacterData.of(codePoint).isEmoji(codePoint); 11105 } 11106 11107 /** 11108 * Determines if the specified character (Unicode code point) has the 11109 * Emoji Presentation property by default. 11110 * <p> 11111 * A character is considered to have the Emoji Presentation property if and 11112 * only if it has the {@code Emoji_Presentation} property, defined in 11113 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11114 * Unicode Emoji (Technical Standard #51)</a>. 11115 * 11116 * @param codePoint the character (Unicode code point) to be tested. 11117 * @return {@code true} if the character has the Emoji Presentation 11118 * property; {@code false} otherwise. 11119 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11120 * @since 21 11121 */ 11122 public static boolean isEmojiPresentation(int codePoint) { 11123 return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 11124 } 11125 11126 /** 11127 * Determines if the specified character (Unicode code point) is an 11128 * Emoji Modifier. 11129 * <p> 11130 * A character is considered to be an Emoji Modifier if and only if it has 11131 * the {@code Emoji_Modifier} property, defined in 11132 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11133 * Unicode Emoji (Technical Standard #51)</a>. 11134 * 11135 * @param codePoint the character (Unicode code point) to be tested. 11136 * @return {@code true} if the character is an Emoji Modifier; 11137 * {@code false} otherwise. 11138 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11139 * @since 21 11140 */ 11141 public static boolean isEmojiModifier(int codePoint) { 11142 return CharacterData.of(codePoint).isEmojiModifier(codePoint); 11143 } 11144 11145 /** 11146 * Determines if the specified character (Unicode code point) is an 11147 * Emoji Modifier Base. 11148 * <p> 11149 * A character is considered to be an Emoji Modifier Base if and only if it has 11150 * the {@code Emoji_Modifier_Base} property, defined in 11151 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11152 * Unicode Emoji (Technical Standard #51)</a>. 11153 * 11154 * @param codePoint the character (Unicode code point) to be tested. 11155 * @return {@code true} if the character is an Emoji Modifier Base; 11156 * {@code false} otherwise. 11157 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11158 * @since 21 11159 */ 11160 public static boolean isEmojiModifierBase(int codePoint) { 11161 return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 11162 } 11163 11164 /** 11165 * Determines if the specified character (Unicode code point) is an 11166 * Emoji Component. 11167 * <p> 11168 * A character is considered to be an Emoji Component if and only if it has 11169 * the {@code Emoji_Component} property, defined in 11170 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11171 * Unicode Emoji (Technical Standard #51)</a>. 11172 * 11173 * @param codePoint the character (Unicode code point) to be tested. 11174 * @return {@code true} if the character is an Emoji Component; 11175 * {@code false} otherwise. 11176 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11177 * @since 21 11178 */ 11179 public static boolean isEmojiComponent(int codePoint) { 11180 return CharacterData.of(codePoint).isEmojiComponent(codePoint); 11181 } 11182 11183 /** 11184 * Determines if the specified character (Unicode code point) is 11185 * an Extended Pictographic. 11186 * <p> 11187 * A character is considered to be an Extended Pictographic if and only if it has 11188 * the {@code Extended_Pictographic} property, defined in 11189 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11190 * Unicode Emoji (Technical Standard #51)</a>. 11191 * 11192 * @param codePoint the character (Unicode code point) to be tested. 11193 * @return {@code true} if the character is an Extended Pictographic; 11194 * {@code false} otherwise. 11195 * @spec https://unicode.org/reports/tr51/ Unicode Emoji 11196 * @since 21 11197 */ 11198 public static boolean isExtendedPictographic(int codePoint) { 11199 return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 11200 } 11201 11202 /** 11203 * Converts the character argument to lowercase using case 11204 * mapping information from the UnicodeData file. 11205 * <p> 11206 * Note that 11207 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 11208 * does not always return {@code true} for some ranges of 11209 * characters, particularly those that are symbols or ideographs. 11210 * 11211 * <p>In general, {@link String#toLowerCase()} should be used to map 11212 * characters to lowercase. {@code String} case mapping methods 11213 * have several benefits over {@code Character} case mapping methods. 11214 * {@code String} case mapping methods can perform locale-sensitive 11215 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11216 * the {@code Character} case mapping methods cannot. 11217 * 11218 * <p><b>Note:</b> This method cannot handle <a 11219 * href="#supplementary"> supplementary characters</a>. To support 11220 * all Unicode characters, including supplementary characters, use 11221 * the {@link #toLowerCase(int)} method. 11222 * 11223 * @param ch the character to be converted. 11224 * @return the lowercase equivalent of the character, if any; 11225 * otherwise, the character itself. 11226 * @see Character#isLowerCase(char) 11227 * @see String#toLowerCase() 11228 */ 11229 public static char toLowerCase(char ch) { 11230 return (char)toLowerCase((int)ch); 11231 } 11232 11233 /** 11234 * Converts the character (Unicode code point) argument to 11235 * lowercase using case mapping information from the UnicodeData 11236 * file. 11237 * 11238 * <p> Note that 11239 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 11240 * does not always return {@code true} for some ranges of 11241 * characters, particularly those that are symbols or ideographs. 11242 * 11243 * <p>In general, {@link String#toLowerCase()} should be used to map 11244 * characters to lowercase. {@code String} case mapping methods 11245 * have several benefits over {@code Character} case mapping methods. 11246 * {@code String} case mapping methods can perform locale-sensitive 11247 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11248 * the {@code Character} case mapping methods cannot. 11249 * 11250 * @param codePoint the character (Unicode code point) to be converted. 11251 * @return the lowercase equivalent of the character (Unicode code 11252 * point), if any; otherwise, the character itself. 11253 * @see Character#isLowerCase(int) 11254 * @see String#toLowerCase() 11255 * 11256 * @since 1.5 11257 */ 11258 public static int toLowerCase(int codePoint) { 11259 return CharacterData.of(codePoint).toLowerCase(codePoint); 11260 } 11261 11262 /** 11263 * Converts the character argument to uppercase using case mapping 11264 * information from the UnicodeData file. 11265 * <p> 11266 * Note that 11267 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 11268 * does not always return {@code true} for some ranges of 11269 * characters, particularly those that are symbols or ideographs. 11270 * 11271 * <p>In general, {@link String#toUpperCase()} should be used to map 11272 * characters to uppercase. {@code String} case mapping methods 11273 * have several benefits over {@code Character} case mapping methods. 11274 * {@code String} case mapping methods can perform locale-sensitive 11275 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11276 * the {@code Character} case mapping methods cannot. 11277 * 11278 * <p><b>Note:</b> This method cannot handle <a 11279 * href="#supplementary"> supplementary characters</a>. To support 11280 * all Unicode characters, including supplementary characters, use 11281 * the {@link #toUpperCase(int)} method. 11282 * 11283 * @param ch the character to be converted. 11284 * @return the uppercase equivalent of the character, if any; 11285 * otherwise, the character itself. 11286 * @see Character#isUpperCase(char) 11287 * @see String#toUpperCase() 11288 */ 11289 public static char toUpperCase(char ch) { 11290 return (char)toUpperCase((int)ch); 11291 } 11292 11293 /** 11294 * Converts the character (Unicode code point) argument to 11295 * uppercase using case mapping information from the UnicodeData 11296 * file. 11297 * 11298 * <p>Note that 11299 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11300 * does not always return {@code true} for some ranges of 11301 * characters, particularly those that are symbols or ideographs. 11302 * 11303 * <p>In general, {@link String#toUpperCase()} should be used to map 11304 * characters to uppercase. {@code String} case mapping methods 11305 * have several benefits over {@code Character} case mapping methods. 11306 * {@code String} case mapping methods can perform locale-sensitive 11307 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11308 * the {@code Character} case mapping methods cannot. 11309 * 11310 * @param codePoint the character (Unicode code point) to be converted. 11311 * @return the uppercase equivalent of the character, if any; 11312 * otherwise, the character itself. 11313 * @see Character#isUpperCase(int) 11314 * @see String#toUpperCase() 11315 * 11316 * @since 1.5 11317 */ 11318 public static int toUpperCase(int codePoint) { 11319 return CharacterData.of(codePoint).toUpperCase(codePoint); 11320 } 11321 11322 /** 11323 * Converts the character argument to titlecase using case mapping 11324 * information from the UnicodeData file. If a character has no 11325 * explicit titlecase mapping and is not itself a titlecase char 11326 * according to UnicodeData, then the uppercase mapping is 11327 * returned as an equivalent titlecase mapping. If the 11328 * {@code char} argument is already a titlecase 11329 * {@code char}, the same {@code char} value will be 11330 * returned. 11331 * <p> 11332 * Note that 11333 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11334 * does not always return {@code true} for some ranges of 11335 * characters. 11336 * 11337 * <p><b>Note:</b> This method cannot handle <a 11338 * href="#supplementary"> supplementary characters</a>. To support 11339 * all Unicode characters, including supplementary characters, use 11340 * the {@link #toTitleCase(int)} method. 11341 * 11342 * @param ch the character to be converted. 11343 * @return the titlecase equivalent of the character, if any; 11344 * otherwise, the character itself. 11345 * @see Character#isTitleCase(char) 11346 * @see Character#toLowerCase(char) 11347 * @see Character#toUpperCase(char) 11348 * @since 1.0.2 11349 */ 11350 public static char toTitleCase(char ch) { 11351 return (char)toTitleCase((int)ch); 11352 } 11353 11354 /** 11355 * Converts the character (Unicode code point) argument to titlecase using case mapping 11356 * information from the UnicodeData file. If a character has no 11357 * explicit titlecase mapping and is not itself a titlecase char 11358 * according to UnicodeData, then the uppercase mapping is 11359 * returned as an equivalent titlecase mapping. If the 11360 * character argument is already a titlecase 11361 * character, the same character value will be 11362 * returned. 11363 * 11364 * <p>Note that 11365 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11366 * does not always return {@code true} for some ranges of 11367 * characters. 11368 * 11369 * @param codePoint the character (Unicode code point) to be converted. 11370 * @return the titlecase equivalent of the character, if any; 11371 * otherwise, the character itself. 11372 * @see Character#isTitleCase(int) 11373 * @see Character#toLowerCase(int) 11374 * @see Character#toUpperCase(int) 11375 * @since 1.5 11376 */ 11377 public static int toTitleCase(int codePoint) { 11378 return CharacterData.of(codePoint).toTitleCase(codePoint); 11379 } 11380 11381 /** 11382 * Returns the numeric value of the character {@code ch} in the 11383 * specified radix. 11384 * <p> 11385 * If the radix is not in the range {@code MIN_RADIX} ≤ 11386 * {@code radix} ≤ {@code MAX_RADIX} or if the 11387 * value of {@code ch} is not a valid digit in the specified 11388 * radix, {@code -1} is returned. A character is a valid digit 11389 * if at least one of the following is true: 11390 * <ul> 11391 * <li>The method {@code isDigit} is {@code true} of the character 11392 * and the Unicode decimal digit value of the character (or its 11393 * single-character decomposition) is less than the specified radix. 11394 * In this case the decimal digit value is returned. 11395 * <li>The character is one of the uppercase Latin letters 11396 * {@code 'A'} through {@code 'Z'} and its code is less than 11397 * {@code radix + 'A' - 10}. 11398 * In this case, {@code ch - 'A' + 10} 11399 * is returned. 11400 * <li>The character is one of the lowercase Latin letters 11401 * {@code 'a'} through {@code 'z'} and its code is less than 11402 * {@code radix + 'a' - 10}. 11403 * In this case, {@code ch - 'a' + 10} 11404 * is returned. 11405 * <li>The character is one of the fullwidth uppercase Latin letters A 11406 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11407 * and its code is less than 11408 * {@code radix + '\u005CuFF21' - 10}. 11409 * In this case, {@code ch - '\u005CuFF21' + 10} 11410 * is returned. 11411 * <li>The character is one of the fullwidth lowercase Latin letters a 11412 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11413 * and its code is less than 11414 * {@code radix + '\u005CuFF41' - 10}. 11415 * In this case, {@code ch - '\u005CuFF41' + 10} 11416 * is returned. 11417 * </ul> 11418 * 11419 * <p><b>Note:</b> This method cannot handle <a 11420 * href="#supplementary"> supplementary characters</a>. To support 11421 * all Unicode characters, including supplementary characters, use 11422 * the {@link #digit(int, int)} method. 11423 * 11424 * @param ch the character to be converted. 11425 * @param radix the radix. 11426 * @return the numeric value represented by the character in the 11427 * specified radix. 11428 * @see Character#forDigit(int, int) 11429 * @see Character#isDigit(char) 11430 */ 11431 public static int digit(char ch, int radix) { 11432 return digit((int)ch, radix); 11433 } 11434 11435 /** 11436 * Returns the numeric value of the specified character (Unicode 11437 * code point) in the specified radix. 11438 * 11439 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11440 * {@code radix} ≤ {@code MAX_RADIX} or if the 11441 * character is not a valid digit in the specified 11442 * radix, {@code -1} is returned. A character is a valid digit 11443 * if at least one of the following is true: 11444 * <ul> 11445 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11446 * and the Unicode decimal digit value of the character (or its 11447 * single-character decomposition) is less than the specified radix. 11448 * In this case the decimal digit value is returned. 11449 * <li>The character is one of the uppercase Latin letters 11450 * {@code 'A'} through {@code 'Z'} and its code is less than 11451 * {@code radix + 'A' - 10}. 11452 * In this case, {@code codePoint - 'A' + 10} 11453 * is returned. 11454 * <li>The character is one of the lowercase Latin letters 11455 * {@code 'a'} through {@code 'z'} and its code is less than 11456 * {@code radix + 'a' - 10}. 11457 * In this case, {@code codePoint - 'a' + 10} 11458 * is returned. 11459 * <li>The character is one of the fullwidth uppercase Latin letters A 11460 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11461 * and its code is less than 11462 * {@code radix + '\u005CuFF21' - 10}. 11463 * In this case, 11464 * {@code codePoint - '\u005CuFF21' + 10} 11465 * is returned. 11466 * <li>The character is one of the fullwidth lowercase Latin letters a 11467 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11468 * and its code is less than 11469 * {@code radix + '\u005CuFF41'- 10}. 11470 * In this case, 11471 * {@code codePoint - '\u005CuFF41' + 10} 11472 * is returned. 11473 * </ul> 11474 * 11475 * @param codePoint the character (Unicode code point) to be converted. 11476 * @param radix the radix. 11477 * @return the numeric value represented by the character in the 11478 * specified radix. 11479 * @see Character#forDigit(int, int) 11480 * @see Character#isDigit(int) 11481 * @since 1.5 11482 */ 11483 public static int digit(int codePoint, int radix) { 11484 return CharacterData.of(codePoint).digit(codePoint, radix); 11485 } 11486 11487 /** 11488 * Returns the {@code int} value that the specified Unicode 11489 * character represents. For example, the character 11490 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11491 * an int with a value of 50. 11492 * <p> 11493 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11494 * {@code '\u005Cu005A'}), lowercase 11495 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11496 * full width variant ({@code '\u005CuFF21'} through 11497 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11498 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11499 * through 35. This is independent of the Unicode specification, 11500 * which does not assign numeric values to these {@code char} 11501 * values. 11502 * <p> 11503 * If the character does not have a numeric value, then -1 is returned. 11504 * If the character has a numeric value that cannot be represented as a 11505 * nonnegative integer (for example, a fractional value), then -2 11506 * is returned. 11507 * 11508 * <p><b>Note:</b> This method cannot handle <a 11509 * href="#supplementary"> supplementary characters</a>. To support 11510 * all Unicode characters, including supplementary characters, use 11511 * the {@link #getNumericValue(int)} method. 11512 * 11513 * @param ch the character to be converted. 11514 * @return the numeric value of the character, as a nonnegative {@code int} 11515 * value; -2 if the character has a numeric value but the value 11516 * can not be represented as a nonnegative {@code int} value; 11517 * -1 if the character has no numeric value. 11518 * @see Character#forDigit(int, int) 11519 * @see Character#isDigit(char) 11520 * @since 1.1 11521 */ 11522 public static int getNumericValue(char ch) { 11523 return getNumericValue((int)ch); 11524 } 11525 11526 /** 11527 * Returns the {@code int} value that the specified 11528 * character (Unicode code point) represents. For example, the character 11529 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11530 * an {@code int} with a value of 50. 11531 * <p> 11532 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11533 * {@code '\u005Cu005A'}), lowercase 11534 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11535 * full width variant ({@code '\u005CuFF21'} through 11536 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11537 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11538 * through 35. This is independent of the Unicode specification, 11539 * which does not assign numeric values to these {@code char} 11540 * values. 11541 * <p> 11542 * If the character does not have a numeric value, then -1 is returned. 11543 * If the character has a numeric value that cannot be represented as a 11544 * nonnegative integer (for example, a fractional value), then -2 11545 * is returned. 11546 * 11547 * @param codePoint the character (Unicode code point) to be converted. 11548 * @return the numeric value of the character, as a nonnegative {@code int} 11549 * value; -2 if the character has a numeric value but the value 11550 * can not be represented as a nonnegative {@code int} value; 11551 * -1 if the character has no numeric value. 11552 * @see Character#forDigit(int, int) 11553 * @see Character#isDigit(int) 11554 * @since 1.5 11555 */ 11556 public static int getNumericValue(int codePoint) { 11557 return CharacterData.of(codePoint).getNumericValue(codePoint); 11558 } 11559 11560 /** 11561 * Determines if the specified character is ISO-LATIN-1 white space. 11562 * This method returns {@code true} for the following five 11563 * characters only: 11564 * <table class="striped"> 11565 * <caption style="display:none">truechars</caption> 11566 * <thead> 11567 * <tr><th scope="col">Character 11568 * <th scope="col">Code 11569 * <th scope="col">Name 11570 * </thead> 11571 * <tbody> 11572 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11573 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11574 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11575 * <td>{@code NEW LINE}</td></tr> 11576 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11577 * <td>{@code FORM FEED}</td></tr> 11578 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11579 * <td>{@code CARRIAGE RETURN}</td></tr> 11580 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11581 * <td>{@code SPACE}</td></tr> 11582 * </tbody> 11583 * </table> 11584 * 11585 * @param ch the character to be tested. 11586 * @return {@code true} if the character is ISO-LATIN-1 white 11587 * space; {@code false} otherwise. 11588 * @see Character#isSpaceChar(char) 11589 * @see Character#isWhitespace(char) 11590 * @deprecated Replaced by isWhitespace(char). 11591 */ 11592 @Deprecated(since="1.1") 11593 public static boolean isSpace(char ch) { 11594 return (ch <= 0x0020) && 11595 (((((1L << 0x0009) | 11596 (1L << 0x000A) | 11597 (1L << 0x000C) | 11598 (1L << 0x000D) | 11599 (1L << 0x0020)) >> ch) & 1L) != 0); 11600 } 11601 11602 11603 /** 11604 * Determines if the specified character is a Unicode space character. 11605 * A character is considered to be a space character if and only if 11606 * it is specified to be a space character by the Unicode Standard. This 11607 * method returns true if the character's general category type is any of 11608 * the following: 11609 * <ul> 11610 * <li> {@code SPACE_SEPARATOR} 11611 * <li> {@code LINE_SEPARATOR} 11612 * <li> {@code PARAGRAPH_SEPARATOR} 11613 * </ul> 11614 * 11615 * <p><b>Note:</b> This method cannot handle <a 11616 * href="#supplementary"> supplementary characters</a>. To support 11617 * all Unicode characters, including supplementary characters, use 11618 * the {@link #isSpaceChar(int)} method. 11619 * 11620 * @param ch the character to be tested. 11621 * @return {@code true} if the character is a space character; 11622 * {@code false} otherwise. 11623 * @see Character#isWhitespace(char) 11624 * @since 1.1 11625 */ 11626 public static boolean isSpaceChar(char ch) { 11627 return isSpaceChar((int)ch); 11628 } 11629 11630 /** 11631 * Determines if the specified character (Unicode code point) is a 11632 * Unicode space character. A character is considered to be a 11633 * space character if and only if it is specified to be a space 11634 * character by the Unicode Standard. This method returns true if 11635 * the character's general category type is any of the following: 11636 * 11637 * <ul> 11638 * <li> {@link #SPACE_SEPARATOR} 11639 * <li> {@link #LINE_SEPARATOR} 11640 * <li> {@link #PARAGRAPH_SEPARATOR} 11641 * </ul> 11642 * 11643 * @param codePoint the character (Unicode code point) to be tested. 11644 * @return {@code true} if the character is a space character; 11645 * {@code false} otherwise. 11646 * @see Character#isWhitespace(int) 11647 * @since 1.5 11648 */ 11649 public static boolean isSpaceChar(int codePoint) { 11650 return ((((1 << Character.SPACE_SEPARATOR) | 11651 (1 << Character.LINE_SEPARATOR) | 11652 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11653 != 0; 11654 } 11655 11656 /** 11657 * Determines if the specified character is white space according to Java. 11658 * A character is a Java whitespace character if and only if it satisfies 11659 * one of the following criteria: 11660 * <ul> 11661 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11662 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11663 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11664 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11665 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11666 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11667 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11668 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11669 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11670 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11671 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11672 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11673 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11674 * </ul> 11675 * 11676 * <p><b>Note:</b> This method cannot handle <a 11677 * href="#supplementary"> supplementary characters</a>. To support 11678 * all Unicode characters, including supplementary characters, use 11679 * the {@link #isWhitespace(int)} method. 11680 * 11681 * @param ch the character to be tested. 11682 * @return {@code true} if the character is a Java whitespace 11683 * character; {@code false} otherwise. 11684 * @see Character#isSpaceChar(char) 11685 * @since 1.1 11686 */ 11687 public static boolean isWhitespace(char ch) { 11688 return isWhitespace((int)ch); 11689 } 11690 11691 /** 11692 * Determines if the specified character (Unicode code point) is 11693 * white space according to Java. A character is a Java 11694 * whitespace character if and only if it satisfies one of the 11695 * following criteria: 11696 * <ul> 11697 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11698 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11699 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11700 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11701 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11702 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11703 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11704 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11705 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11706 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11707 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11708 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11709 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11710 * </ul> 11711 * 11712 * @param codePoint the character (Unicode code point) to be tested. 11713 * @return {@code true} if the character is a Java whitespace 11714 * character; {@code false} otherwise. 11715 * @see Character#isSpaceChar(int) 11716 * @since 1.5 11717 */ 11718 public static boolean isWhitespace(int codePoint) { 11719 return CharacterData.of(codePoint).isWhitespace(codePoint); 11720 } 11721 11722 /** 11723 * Determines if the specified character is an ISO control 11724 * character. A character is considered to be an ISO control 11725 * character if its code is in the range {@code '\u005Cu0000'} 11726 * through {@code '\u005Cu001F'} or in the range 11727 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11728 * 11729 * <p><b>Note:</b> This method cannot handle <a 11730 * href="#supplementary"> supplementary characters</a>. To support 11731 * all Unicode characters, including supplementary characters, use 11732 * the {@link #isISOControl(int)} method. 11733 * 11734 * @param ch the character to be tested. 11735 * @return {@code true} if the character is an ISO control character; 11736 * {@code false} otherwise. 11737 * 11738 * @see Character#isSpaceChar(char) 11739 * @see Character#isWhitespace(char) 11740 * @since 1.1 11741 */ 11742 public static boolean isISOControl(char ch) { 11743 return isISOControl((int)ch); 11744 } 11745 11746 /** 11747 * Determines if the referenced character (Unicode code point) is an ISO control 11748 * character. A character is considered to be an ISO control 11749 * character if its code is in the range {@code '\u005Cu0000'} 11750 * through {@code '\u005Cu001F'} or in the range 11751 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11752 * 11753 * @param codePoint the character (Unicode code point) to be tested. 11754 * @return {@code true} if the character is an ISO control character; 11755 * {@code false} otherwise. 11756 * @see Character#isSpaceChar(int) 11757 * @see Character#isWhitespace(int) 11758 * @since 1.5 11759 */ 11760 public static boolean isISOControl(int codePoint) { 11761 // Optimized form of: 11762 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11763 // (codePoint >= 0x7F && codePoint <= 0x9F); 11764 return codePoint <= 0x9F && 11765 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11766 } 11767 11768 /** 11769 * Returns a value indicating a character's general category. 11770 * 11771 * <p><b>Note:</b> This method cannot handle <a 11772 * href="#supplementary"> supplementary characters</a>. To support 11773 * all Unicode characters, including supplementary characters, use 11774 * the {@link #getType(int)} method. 11775 * 11776 * @param ch the character to be tested. 11777 * @return a value of type {@code int} representing the 11778 * character's general category. 11779 * @see Character#COMBINING_SPACING_MARK 11780 * @see Character#CONNECTOR_PUNCTUATION 11781 * @see Character#CONTROL 11782 * @see Character#CURRENCY_SYMBOL 11783 * @see Character#DASH_PUNCTUATION 11784 * @see Character#DECIMAL_DIGIT_NUMBER 11785 * @see Character#ENCLOSING_MARK 11786 * @see Character#END_PUNCTUATION 11787 * @see Character#FINAL_QUOTE_PUNCTUATION 11788 * @see Character#FORMAT 11789 * @see Character#INITIAL_QUOTE_PUNCTUATION 11790 * @see Character#LETTER_NUMBER 11791 * @see Character#LINE_SEPARATOR 11792 * @see Character#LOWERCASE_LETTER 11793 * @see Character#MATH_SYMBOL 11794 * @see Character#MODIFIER_LETTER 11795 * @see Character#MODIFIER_SYMBOL 11796 * @see Character#NON_SPACING_MARK 11797 * @see Character#OTHER_LETTER 11798 * @see Character#OTHER_NUMBER 11799 * @see Character#OTHER_PUNCTUATION 11800 * @see Character#OTHER_SYMBOL 11801 * @see Character#PARAGRAPH_SEPARATOR 11802 * @see Character#PRIVATE_USE 11803 * @see Character#SPACE_SEPARATOR 11804 * @see Character#START_PUNCTUATION 11805 * @see Character#SURROGATE 11806 * @see Character#TITLECASE_LETTER 11807 * @see Character#UNASSIGNED 11808 * @see Character#UPPERCASE_LETTER 11809 * @since 1.1 11810 */ 11811 public static int getType(char ch) { 11812 return getType((int)ch); 11813 } 11814 11815 /** 11816 * Returns a value indicating a character's general category. 11817 * 11818 * @param codePoint the character (Unicode code point) to be tested. 11819 * @return a value of type {@code int} representing the 11820 * character's general category. 11821 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11822 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11823 * @see Character#CONTROL CONTROL 11824 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11825 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11826 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11827 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11828 * @see Character#END_PUNCTUATION END_PUNCTUATION 11829 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11830 * @see Character#FORMAT FORMAT 11831 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11832 * @see Character#LETTER_NUMBER LETTER_NUMBER 11833 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11834 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11835 * @see Character#MATH_SYMBOL MATH_SYMBOL 11836 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11837 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11838 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11839 * @see Character#OTHER_LETTER OTHER_LETTER 11840 * @see Character#OTHER_NUMBER OTHER_NUMBER 11841 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11842 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11843 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11844 * @see Character#PRIVATE_USE PRIVATE_USE 11845 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11846 * @see Character#START_PUNCTUATION START_PUNCTUATION 11847 * @see Character#SURROGATE SURROGATE 11848 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11849 * @see Character#UNASSIGNED UNASSIGNED 11850 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11851 * @since 1.5 11852 */ 11853 public static int getType(int codePoint) { 11854 return CharacterData.of(codePoint).getType(codePoint); 11855 } 11856 11857 /** 11858 * Determines the character representation for a specific digit in 11859 * the specified radix. If the value of {@code radix} is not a 11860 * valid radix, or the value of {@code digit} is not a valid 11861 * digit in the specified radix, the null character 11862 * ({@code '\u005Cu0000'}) is returned. 11863 * <p> 11864 * The {@code radix} argument is valid if it is greater than or 11865 * equal to {@code MIN_RADIX} and less than or equal to 11866 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11867 * {@code 0 <= digit < radix}. 11868 * <p> 11869 * If the digit is less than 10, then 11870 * {@code '0' + digit} is returned. Otherwise, the value 11871 * {@code 'a' + digit - 10} is returned. 11872 * 11873 * @param digit the number to convert to a character. 11874 * @param radix the radix. 11875 * @return the {@code char} representation of the specified digit 11876 * in the specified radix. 11877 * @see Character#MIN_RADIX 11878 * @see Character#MAX_RADIX 11879 * @see Character#digit(char, int) 11880 */ 11881 public static char forDigit(int digit, int radix) { 11882 if ((digit >= radix) || (digit < 0)) { 11883 return '\0'; 11884 } 11885 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11886 return '\0'; 11887 } 11888 if (digit < 10) { 11889 return (char)('0' + digit); 11890 } 11891 return (char)('a' - 10 + digit); 11892 } 11893 11894 /** 11895 * Returns the Unicode directionality property for the given 11896 * character. Character directionality is used to calculate the 11897 * visual ordering of text. The directionality value of undefined 11898 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11899 * 11900 * <p><b>Note:</b> This method cannot handle <a 11901 * href="#supplementary"> supplementary characters</a>. To support 11902 * all Unicode characters, including supplementary characters, use 11903 * the {@link #getDirectionality(int)} method. 11904 * 11905 * @param ch {@code char} for which the directionality property 11906 * is requested. 11907 * @return the directionality property of the {@code char} value. 11908 * 11909 * @see Character#DIRECTIONALITY_UNDEFINED 11910 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11911 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11912 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11913 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11914 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11915 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11916 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11917 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11918 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11919 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11920 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11921 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11922 * @see Character#DIRECTIONALITY_WHITESPACE 11923 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11924 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11925 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11926 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11927 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11928 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11929 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11930 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11931 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11932 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11933 * @since 1.4 11934 */ 11935 public static byte getDirectionality(char ch) { 11936 return getDirectionality((int)ch); 11937 } 11938 11939 /** 11940 * Returns the Unicode directionality property for the given 11941 * character (Unicode code point). Character directionality is 11942 * used to calculate the visual ordering of text. The 11943 * directionality value of undefined character is {@link 11944 * #DIRECTIONALITY_UNDEFINED}. 11945 * 11946 * @param codePoint the character (Unicode code point) for which 11947 * the directionality property is requested. 11948 * @return the directionality property of the character. 11949 * 11950 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11951 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11952 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11953 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11954 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11955 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11956 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11957 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11958 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11959 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11960 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11961 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11962 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11963 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11964 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11965 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11966 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11967 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11968 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11969 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11970 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11971 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11972 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11973 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11974 * @since 1.5 11975 */ 11976 public static byte getDirectionality(int codePoint) { 11977 return CharacterData.of(codePoint).getDirectionality(codePoint); 11978 } 11979 11980 /** 11981 * Determines whether the character is mirrored according to the 11982 * Unicode specification. Mirrored characters should have their 11983 * glyphs horizontally mirrored when displayed in text that is 11984 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11985 * PARENTHESIS is semantically defined to be an <i>opening 11986 * parenthesis</i>. This will appear as a "(" in text that is 11987 * left-to-right but as a ")" in text that is right-to-left. 11988 * 11989 * <p><b>Note:</b> This method cannot handle <a 11990 * href="#supplementary"> supplementary characters</a>. To support 11991 * all Unicode characters, including supplementary characters, use 11992 * the {@link #isMirrored(int)} method. 11993 * 11994 * @param ch {@code char} for which the mirrored property is requested 11995 * @return {@code true} if the char is mirrored, {@code false} 11996 * if the {@code char} is not mirrored or is not defined. 11997 * @since 1.4 11998 */ 11999 public static boolean isMirrored(char ch) { 12000 return isMirrored((int)ch); 12001 } 12002 12003 /** 12004 * Determines whether the specified character (Unicode code point) 12005 * is mirrored according to the Unicode specification. Mirrored 12006 * characters should have their glyphs horizontally mirrored when 12007 * displayed in text that is right-to-left. For example, 12008 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 12009 * defined to be an <i>opening parenthesis</i>. This will appear 12010 * as a "(" in text that is left-to-right but as a ")" in text 12011 * that is right-to-left. 12012 * 12013 * @param codePoint the character (Unicode code point) to be tested. 12014 * @return {@code true} if the character is mirrored, {@code false} 12015 * if the character is not mirrored or is not defined. 12016 * @since 1.5 12017 */ 12018 public static boolean isMirrored(int codePoint) { 12019 return CharacterData.of(codePoint).isMirrored(codePoint); 12020 } 12021 12022 /** 12023 * Compares two {@code Character} objects numerically. 12024 * 12025 * @param anotherCharacter the {@code Character} to be compared. 12026 * @return the value {@code 0} if the argument {@code Character} 12027 * is equal to this {@code Character}; a value less than 12028 * {@code 0} if this {@code Character} is numerically less 12029 * than the {@code Character} argument; and a value greater than 12030 * {@code 0} if this {@code Character} is numerically greater 12031 * than the {@code Character} argument (unsigned comparison). 12032 * Note that this is strictly a numerical comparison; it is not 12033 * locale-dependent. 12034 * @since 1.2 12035 */ 12036 public int compareTo(Character anotherCharacter) { 12037 return compare(this.value, anotherCharacter.value); 12038 } 12039 12040 /** 12041 * Compares two {@code char} values numerically. 12042 * The value returned is identical to what would be returned by: 12043 * <pre> 12044 * Character.valueOf(x).compareTo(Character.valueOf(y)) 12045 * </pre> 12046 * 12047 * @param x the first {@code char} to compare 12048 * @param y the second {@code char} to compare 12049 * @return the value {@code 0} if {@code x == y}; 12050 * a value less than {@code 0} if {@code x < y}; and 12051 * a value greater than {@code 0} if {@code x > y} 12052 * @since 1.7 12053 */ 12054 public static int compare(char x, char y) { 12055 return x - y; 12056 } 12057 12058 /** 12059 * Converts the character (Unicode code point) argument to uppercase using 12060 * information from the UnicodeData file. 12061 * 12062 * @param codePoint the character (Unicode code point) to be converted. 12063 * @return either the uppercase equivalent of the character, if 12064 * any, or an error flag ({@code Character.ERROR}) 12065 * that indicates that a 1:M {@code char} mapping exists. 12066 * @see Character#isLowerCase(char) 12067 * @see Character#isUpperCase(char) 12068 * @see Character#toLowerCase(char) 12069 * @see Character#toTitleCase(char) 12070 * @since 1.4 12071 */ 12072 static int toUpperCaseEx(int codePoint) { 12073 assert isValidCodePoint(codePoint); 12074 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 12075 } 12076 12077 /** 12078 * Converts the character (Unicode code point) argument to uppercase using case 12079 * mapping information from the SpecialCasing file in the Unicode 12080 * specification. If a character has no explicit uppercase 12081 * mapping, then the {@code char} itself is returned in the 12082 * {@code char[]}. 12083 * 12084 * @param codePoint the character (Unicode code point) to be converted. 12085 * @return a {@code char[]} with the uppercased character. 12086 * @since 1.4 12087 */ 12088 static char[] toUpperCaseCharArray(int codePoint) { 12089 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 12090 assert isBmpCodePoint(codePoint); 12091 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 12092 } 12093 12094 /** 12095 * The number of bits used to represent a {@code char} value in unsigned 12096 * binary form, constant {@code 16}. 12097 * 12098 * @since 1.5 12099 */ 12100 public static final int SIZE = 16; 12101 12102 /** 12103 * The number of bytes used to represent a {@code char} value in unsigned 12104 * binary form. 12105 * 12106 * @since 1.8 12107 */ 12108 public static final int BYTES = SIZE / Byte.SIZE; 12109 12110 /** 12111 * Returns the value obtained by reversing the order of the bytes in the 12112 * specified {@code char} value. 12113 * 12114 * @param ch The {@code char} of which to reverse the byte order. 12115 * @return the value obtained by reversing (or, equivalently, swapping) 12116 * the bytes in the specified {@code char} value. 12117 * @since 1.5 12118 */ 12119 @IntrinsicCandidate 12120 public static char reverseBytes(char ch) { 12121 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 12122 } 12123 12124 /** 12125 * Returns the name of the specified character 12126 * {@code codePoint}, or null if the code point is 12127 * {@link #UNASSIGNED unassigned}. 12128 * <p> 12129 * If the specified character is not assigned a name by 12130 * the <i>UnicodeData</i> file (part of the Unicode Character 12131 * Database maintained by the Unicode Consortium), the returned 12132 * name is the same as the result of the expression: 12133 * 12134 * <blockquote>{@code 12135 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12136 * + " " 12137 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12138 * 12139 * }</blockquote> 12140 * 12141 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 12142 * returned by this method follows the naming scheme in the 12143 * "Unicode Name Property" section of the Unicode Standard. For other 12144 * code points, such as Hangul/Ideographs, The name generation rule above 12145 * differs from the one defined in the Unicode Standard. 12146 * 12147 * @param codePoint the character (Unicode code point) 12148 * 12149 * @return the name of the specified character, or null if 12150 * the code point is unassigned. 12151 * 12152 * @throws IllegalArgumentException if the specified 12153 * {@code codePoint} is not a valid Unicode 12154 * code point. 12155 * 12156 * @since 1.7 12157 */ 12158 public static String getName(int codePoint) { 12159 if (!isValidCodePoint(codePoint)) { 12160 throw new IllegalArgumentException( 12161 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 12162 } 12163 String name = CharacterName.getInstance().getName(codePoint); 12164 if (name != null) 12165 return name; 12166 if (getType(codePoint) == UNASSIGNED) 12167 return null; 12168 UnicodeBlock block = UnicodeBlock.of(codePoint); 12169 if (block != null) 12170 return block.toString().replace('_', ' ') + " " 12171 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12172 // should never come here 12173 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12174 } 12175 12176 /** 12177 * Returns the code point value of the Unicode character specified by 12178 * the given character name. 12179 * <p> 12180 * If a character is not assigned a name by the <i>UnicodeData</i> 12181 * file (part of the Unicode Character Database maintained by the Unicode 12182 * Consortium), its name is defined as the result of the expression: 12183 * 12184 * <blockquote>{@code 12185 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12186 * + " " 12187 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12188 * 12189 * }</blockquote> 12190 * <p> 12191 * The {@code name} matching is case insensitive, with any leading and 12192 * trailing whitespace character removed. 12193 * 12194 * For the code points in the <i>UnicodeData</i> file, this method 12195 * recognizes the name which conforms to the name defined in the 12196 * "Unicode Name Property" section in the Unicode Standard. For other 12197 * code points, this method recognizes the name generated with 12198 * {@link #getName(int)} method. 12199 * 12200 * @param name the character name 12201 * 12202 * @return the code point value of the character specified by its name. 12203 * 12204 * @throws IllegalArgumentException if the specified {@code name} 12205 * is not a valid character name. 12206 * @throws NullPointerException if {@code name} is {@code null} 12207 * 12208 * @since 9 12209 */ 12210 public static int codePointOf(String name) { 12211 name = name.trim().toUpperCase(Locale.ROOT); 12212 int cp = CharacterName.getInstance().getCodePoint(name); 12213 if (cp != -1) 12214 return cp; 12215 try { 12216 int off = name.lastIndexOf(' '); 12217 if (off != -1) { 12218 cp = Integer.parseInt(name, off + 1, name.length(), 16); 12219 if (isValidCodePoint(cp) && name.equals(getName(cp))) 12220 return cp; 12221 } 12222 } catch (Exception x) {} 12223 throw new IllegalArgumentException("Unrecognized character name :" + name); 12224 } 12225 }