1 /* 2 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.openjdk.asmtools.jasm; 24 25 import static java.lang.String.format; 26 import static org.openjdk.asmtools.jasm.JasmTokens.*; 27 import static org.openjdk.asmtools.jasm.Constants.EOF; 28 import static org.openjdk.asmtools.jasm.Constants.OFFSETBITS; 29 import java.io.IOException; 30 import java.util.function.Predicate; 31 32 /** 33 * A Scanner for Jasm tokens. Errors are reported to the environment object.<p> 34 * 35 * The scanner keeps track of the current token, the value of the current token (if any), 36 * and the start position of the current token.<p> 37 * 38 * The scan() method advances the scanner to the next token in the input.<p> 39 * 40 * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[') 41 * with their closing counter part. This is useful during error recovery.<p> 42 * 43 * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p> 44 */ 45 public class Scanner extends ParseBase { 46 47 /** 48 * SyntaxError is the generic error thrown for parsing problems. 49 */ 50 protected static class SyntaxError extends Error { 51 boolean fatalError = false; 52 SyntaxError Fatal() { fatalError = true; return this; } 53 boolean isFatal() {return fatalError;} 54 } 55 56 /** 57 * Input stream 58 */ 59 protected Environment in; 60 61 /** 62 * The current character 63 */ 64 protected int ch; 65 66 /** 67 * Current token 68 */ 69 // protected int token; 70 protected Token token; 71 72 /** 73 * The position of the current token 74 */ 75 protected int pos; 76 77 /* 78 * Token values. 79 */ 80 protected char charValue; 81 protected int intValue; 82 protected long longValue; 83 protected float floatValue; 84 protected double doubleValue; 85 protected String stringValue; 86 protected String idValue; 87 protected int radix; // Radix, when reading int or long 88 89 /* doc comment preceding the most recent token */ 90 protected String docComment; 91 92 /* A growable character buffer. */ 93 private int count; 94 private char buffer[] = new char[32]; 95 // 96 private Predicate<Integer> escapingAllowed; 97 /** 98 * The position of the previous token 99 */ 100 protected int prevPos; 101 protected int sign; // sign, when reading number 102 protected boolean inBits; // inBits prefix, when reading number 103 104 /** 105 * main constructor. 106 * 107 * Create a scanner to scan an input stream. 108 */ 109 protected Scanner(Environment env) throws IOException { 110 super.init(this, null, env); 111 escapingAllowed = noFunc; 112 this.in = env; 113 ch = env.read(); 114 xscan(); 115 } 116 117 protected void scanModuleStatement() throws IOException { 118 try { 119 escapingAllowed = yesAndProcessFunc; 120 scan(); 121 } finally { 122 escapingAllowed = noFunc; 123 } 124 } 125 126 /** 127 * scan 128 * 129 * Scan the next token. 130 * 131 * @throws IOException 132 */ 133 protected void scan() throws IOException { 134 int signloc = 1, cnt = 0; 135 prevPos = pos; 136 prefix: 137 for (;;) { 138 xscan(); 139 switch (token) { 140 case SIGN: 141 signloc = signloc * intValue; 142 break; 143 default: 144 break prefix; 145 } 146 cnt++; 147 } 148 switch (token) { 149 case INTVAL: 150 case LONGVAL: 151 case FLOATVAL: 152 case DOUBLEVAL: 153 case INF: 154 case NAN: 155 sign = signloc; 156 break; 157 default: 158 } 159 } 160 161 /** 162 * Check the token may be identifier 163 */ 164 protected final boolean checkTokenIdent() { 165 return token.possibleJasmIdentifier(); 166 } 167 168 static String readableConstant(int t) { 169 return "<" + Tables.tag(t) + "> [" + t + "]"; 170 } 171 172 /** 173 * Expects a token, scans the next token or throws an exception. 174 */ 175 protected final void expect(Token t) throws SyntaxError, IOException { 176 check(t); 177 scan(); 178 } 179 180 /** 181 * Checks a token, throws an exception if not the same 182 */ 183 protected final void check(Token t) throws SyntaxError, IOException { 184 if (token != t) { 185 if ((t != Token.IDENT) || !checkTokenIdent()) { 186 env.traceln("expect: " + t + " instead of " + token); 187 switch (t) { 188 case IDENT: 189 env.error(pos, "identifier.expected"); 190 break; 191 default: 192 env.error(pos, "token.expected", "<" + t.printValue() + ">"); 193 break; 194 } 195 196 if (debugFlag) { 197 debugStr("<<<<<PROBLEM>>>>>>>: "); 198 throw new Error("<<<<<PROBLEM>>>>>>>"); 199 } else { 200 throw new SyntaxError(); 201 } 202 } 203 } 204 } 205 206 private void putCh(int ch) { 207 if (count == buffer.length) { 208 char newBuffer[] = new char[buffer.length * 2]; 209 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 210 buffer = newBuffer; 211 } 212 buffer[count++] = (char) ch; 213 } 214 215 private String bufferString() { 216 char buf[] = new char[count]; 217 System.arraycopy(buffer, 0, buf, 0, count); 218 return new String(buf); 219 } 220 221 /** 222 * Returns true if the character is a unicode digit. 223 * 224 * @param ch the character to be checked 225 */ 226 public static boolean isUCDigit(int ch) { 227 if ((ch >= '0') && (ch <= '9')) { 228 return true; 229 } 230 switch (ch >> 8) { 231 case 0x06: 232 return ((ch >= 0x0660) && (ch <= 0x0669)) || // Arabic-Indic 233 ((ch >= 0x06f0) && (ch <= 0x06f9)); // Eastern Arabic-Indic 234 case 0x07: 235 case 0x08: 236 default: 237 return false; 238 case 0x09: 239 return ((ch >= 0x0966) && (ch <= 0x096f)) || // Devanagari 240 ((ch >= 0x09e6) && (ch <= 0x09ef)); // Bengali 241 case 0x0a: 242 return ((ch >= 0x0a66) && (ch <= 0x0a6f)) || // Gurmukhi 243 ((ch >= 0x0ae6) && (ch <= 0x0aef)); // Gujarati 244 case 0x0b: 245 return ((ch >= 0x0b66) && (ch <= 0x0b6f)) || // Oriya 246 ((ch >= 0x0be7) && (ch <= 0x0bef)); // Tamil 247 case 0x0c: 248 return ((ch >= 0x0c66) && (ch <= 0x0c6f)) || // Telugu 249 ((ch >= 0x0ce6) && (ch <= 0x0cef)); // Kannada 250 case 0x0d: 251 return ((ch >= 0x0d66) && (ch <= 0x0d6f)); // Malayalam 252 case 0x0e: 253 return ((ch >= 0x0e50) && (ch <= 0x0e59)) || // Thai 254 ((ch >= 0x0ed0) && (ch <= 0x0ed9)); // Lao 255 case 0x0f: 256 return false; 257 case 0x10: 258 return ((ch >= 0x1040) && (ch <= 0x1049)); // Tibetan 259 } 260 } 261 262 /** 263 * Returns true if the character is a Unicode letter. 264 * 265 * @param ch the character to be checked 266 */ 267 public static boolean isUCLetter(int ch) { 268 // fast check for Latin capitals and small letters 269 if (((ch >= 'A') && (ch <= 'Z')) 270 || ((ch >= 'a') && (ch <= 'z'))) { 271 return true; 272 } 273 // rest of ISO-LATIN-1 274 if (ch < 0x0100) { 275 // fast check 276 if (ch < 0x00c0) { 277 return (ch == '_') || (ch == '$'); 278 } 279 // various latin letters and diacritics, 280 // but *not* the multiplication and division symbols 281 return ((ch >= 0x00c0) && (ch <= 0x00d6)) 282 || ((ch >= 0x00d8) && (ch <= 0x00f6)) 283 || ((ch >= 0x00f8) && (ch <= 0x00ff)); 284 } 285 // other non CJK alphabets and symbols, but not digits 286 if (ch <= 0x1fff) { 287 return !isUCDigit(ch); 288 } 289 // rest are letters only in five ranges: 290 // Hiragana, Katakana, Bopomofo and Hangul 291 // CJK Squared Words 292 // Korean Hangul Symbols 293 // Han (Chinese, Japanese, Korean) 294 // Han compatibility 295 return ((ch >= 0x3040) && (ch <= 0x318f)) 296 || ((ch >= 0x3300) && (ch <= 0x337f)) 297 || ((ch >= 0x3400) && (ch <= 0x3d2d)) 298 || ((ch >= 0x4e00) && (ch <= 0x9fff)) 299 || ((ch >= 0xf900) && (ch <= 0xfaff)); 300 } 301 302 /** 303 * Scan a comment. This method should be called once the initial /, * and the next 304 * character have been read. 305 */ 306 private void skipComment() throws IOException { 307 while (true) { 308 switch (ch) { 309 case EOF: 310 env.error(pos, "eof.in.comment"); 311 return; 312 case '*': 313 if ((ch = in.read()) == '/') { 314 ch = in.read(); 315 return; 316 } 317 break; 318 default: 319 ch = in.read(); 320 break; 321 } 322 } 323 } 324 325 /** 326 * Scan a doc comment. This method should be called once the initial /, * and * have 327 * been read. It gathers the content of the comment (without leading spaces and '*'s) 328 * in the string buffer. 329 */ 330 @SuppressWarnings("empty-statement") 331 private String scanDocComment() throws IOException { 332 count = 0; 333 334 if (ch == '*') { 335 do { 336 ch = in.read(); 337 } while (ch == '*'); 338 if (ch == '/') { 339 ch = in.read(); 340 return ""; 341 } 342 } 343 switch (ch) { 344 case '\n': 345 case ' ': 346 ch = in.read(); 347 break; 348 } 349 350 boolean seenstar = false; 351 int c = count; 352 while (true) { 353 switch (ch) { 354 case EOF: 355 env.error(pos, "eof.in.comment"); 356 return bufferString(); 357 case '\n': 358 putCh('\n'); 359 ch = in.read(); 360 seenstar = false; 361 c = count; 362 break; 363 case ' ': 364 case '\t': 365 putCh(ch); 366 ch = in.read(); 367 break; 368 case '*': 369 if (seenstar) { 370 if ((ch = in.read()) == '/') { 371 ch = in.read(); 372 count = c; 373 return bufferString(); 374 } 375 putCh('*'); 376 } else { 377 seenstar = true; 378 count = c; 379 while ((ch = in.read()) == '*'); 380 switch (ch) { 381 case ' ': 382 ch = in.read(); 383 break; 384 case '/': 385 ch = in.read(); 386 count = c; 387 return bufferString(); 388 } 389 } 390 break; 391 default: 392 if (!seenstar) { 393 seenstar = true; 394 } 395 putCh(ch); 396 ch = in.read(); 397 c = count; 398 break; 399 } 400 } 401 } 402 403 /** 404 * Scan a decimal at this point 405 */ 406 private void scanCPRef() throws IOException { 407 switch (ch = in.read()) { 408 case '0': 409 case '1': 410 case '2': 411 case '3': 412 case '4': 413 case '5': 414 case '6': 415 case '7': 416 case '8': 417 case '9': { 418 boolean overflow = false; 419 long value = ch - '0'; 420 count = 0; 421 putCh(ch); // save character in buffer 422 numberLoop: 423 for (;;) { 424 switch (ch = in.read()) { 425 case '0': 426 case '1': 427 case '2': 428 case '3': 429 case '4': 430 case '5': 431 case '6': 432 case '7': 433 case '8': 434 case '9': 435 putCh(ch); 436 if (overflow) { 437 break; 438 } 439 value = (value * 10) + (ch - '0'); 440 overflow = (value > 0xFFFF); 441 break; 442 default: 443 break numberLoop; 444 } 445 } // while true 446 intValue = (int) value; 447 stringValue = bufferString(); 448 token = Token.CPINDEX; 449 if (overflow) { 450 env.error(pos, "overflow"); 451 } 452 break; 453 } 454 default: 455 stringValue = Character.toString((char)ch); 456 env.error(in.pos, "invalid.number", stringValue); 457 intValue = 0; 458 token = Token.CPINDEX; 459 ch = in.read(); 460 } 461 } // scanCPRef() 462 463 /** 464 * Scan a number. The first digit of the number should be the current character. We 465 * may be scanning hex, decimal, or octal at this point 466 */ 467 private void scanNumber() throws IOException { 468 boolean seenNonOctal = false; 469 boolean overflow = false; 470 radix = (ch == '0' ? 8 : 10); 471 long value = ch - '0'; 472 count = 0; 473 putCh(ch); // save character in buffer 474 numberLoop: 475 for (;;) { 476 switch (ch = in.read()) { 477 case '.': 478 if (radix == 16) { 479 break numberLoop; // an illegal character 480 } 481 scanReal(); 482 return; 483 484 case '8': 485 case '9': 486 // We can't yet throw an error if reading an octal. We might 487 // discover we're really reading a real. 488 seenNonOctal = true; 489 case '0': 490 case '1': 491 case '2': 492 case '3': 493 case '4': 494 case '5': 495 case '6': 496 case '7': 497 putCh(ch); 498 if (radix == 10) { 499 overflow = overflow || (value * 10) / 10 != value; 500 value = (value * 10) + (ch - '0'); 501 overflow = overflow || (value - 1 < -1); 502 } else if (radix == 8) { 503 overflow = overflow || (value >>> 61) != 0; 504 value = (value << 3) + (ch - '0'); 505 } else { 506 overflow = overflow || (value >>> 60) != 0; 507 value = (value << 4) + (ch - '0'); 508 } 509 break; 510 case 'd': 511 case 'D': 512 case 'e': 513 case 'E': 514 case 'f': 515 case 'F': 516 if (radix != 16) { 517 scanReal(); 518 return; 519 } 520 // fall through 521 case 'a': 522 case 'A': 523 case 'b': 524 case 'B': 525 case 'c': 526 case 'C': 527 putCh(ch); 528 if (radix != 16) { 529 break numberLoop; // an illegal character 530 } 531 overflow = overflow || (value >>> 60) != 0; 532 value = (value << 4) + 10 533 + Character.toLowerCase((char) ch) - 'a'; 534 break; 535 case 'l': 536 case 'L': 537 ch = in.read(); // skip over 'l' 538 longValue = value; 539 token = Token.LONGVAL; 540 break numberLoop; 541 case 'x': 542 case 'X': 543 // if the first character is a '0' and this is the second 544 // letter, then read in a hexadecimal number. Otherwise, error. 545 if (count == 1 && radix == 8) { 546 radix = 16; 547 break; 548 } else { 549 // we'll get an illegal character error 550 break numberLoop; 551 } 552 default: 553 intValue = (int) value; 554 token = Token.INTVAL; 555 break numberLoop; 556 } 557 } // while true 558 // we have just finished reading the number. The next thing better 559 // not be a letter or digit. 560 if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') { 561 env.error(in.pos, "invalid.number", Character.toString((char)ch)); 562 do { 563 ch = in.read(); 564 } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.'); 565 intValue = 0; 566 token = Token.INTVAL; 567 } else if (radix == 8 && seenNonOctal) { 568 intValue = 0; 569 token = Token.INTVAL; 570 env.error(in.pos, "invalid.octal.number"); 571 } else if (overflow 572 || (token == Token.INTVAL 573 && ((radix == 10) ? (intValue - 1 < -1) 574 : ((value & 0xFFFFFFFF00000000L) != 0)))) { 575 intValue = 0; // so we don't get second overflow in Parser 576 longValue = 0; 577 env.error(pos, "overflow"); 578 } 579 } // scanNumber() 580 581 /** 582 * Scan a float. We are either looking at the decimal, or we have already seen it and 583 * put it into the buffer. We haven't seen an exponent. Scan a float. Should be called 584 * with the current character is either the 'e', 'E' or '.' 585 */ 586 private void scanReal() throws IOException { 587 boolean seenExponent = false; 588 boolean isSingleFloat = false; 589 char lastChar; 590 if (ch == '.') { 591 putCh(ch); 592 ch = in.read(); 593 } 594 595 numberLoop: 596 for (;; ch = in.read()) { 597 switch (ch) { 598 case '0': 599 case '1': 600 case '2': 601 case '3': 602 case '4': 603 case '5': 604 case '6': 605 case '7': 606 case '8': 607 case '9': 608 putCh(ch); 609 break; 610 case 'e': 611 case 'E': 612 if (seenExponent) { 613 break numberLoop; // we'll get a format error 614 } 615 putCh(ch); 616 seenExponent = true; 617 break; 618 case '+': 619 case '-': 620 lastChar = buffer[count - 1]; 621 if (lastChar != 'e' && lastChar != 'E') { 622 break numberLoop; // this isn't an error, though! 623 } 624 putCh(ch); 625 break; 626 case 'f': 627 case 'F': 628 ch = in.read(); // skip over 'f' 629 isSingleFloat = true; 630 break numberLoop; 631 case 'd': 632 case 'D': 633 ch = in.read(); // skip over 'd' 634 // fall through 635 default: 636 break numberLoop; 637 } // sswitch 638 } // loop 639 640 // we have just finished reading the number. The next thing better 641 // not be a letter or digit. 642 if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') { 643 env.error(in.pos, "invalid.number", Character.toString((char)ch)); 644 do { 645 ch = in.read(); 646 } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.'); 647 doubleValue = 0; 648 token = Token.DOUBLEVAL; 649 } else { 650 token = isSingleFloat ? Token.FLOATVAL : Token.DOUBLEVAL; 651 try { 652 lastChar = buffer[count - 1]; 653 if (lastChar == 'e' || lastChar == 'E' 654 || lastChar == '+' || lastChar == '-') { 655 env.error(in.pos - 1, "float.format"); 656 } else if (isSingleFloat) { 657 floatValue = Float.valueOf(bufferString()); 658 if (Float.isInfinite(floatValue)) { 659 env.error(pos, "overflow"); 660 } 661 } else { 662 doubleValue = Double.valueOf(bufferString()); 663 if (Double.isInfinite(doubleValue)) { 664 env.error(pos, "overflow"); 665 env.error(pos, "overflow"); 666 } 667 } 668 } catch (NumberFormatException ee) { 669 env.error(pos, "float.format"); 670 doubleValue = 0; 671 floatValue = 0; 672 } 673 } 674 } // scanReal 675 676 /** 677 * Scan an escape character. 678 * 679 * @return the character or '\\' 680 */ 681 private int scanEscapeChar() throws IOException { 682 int p = in.pos; 683 684 switch (ch = in.read()) { 685 case '0': 686 case '1': 687 case '2': 688 case '3': 689 case '4': 690 case '5': 691 case '6': 692 case '7': { 693 int n = ch - '0'; 694 for (int i = 2; i > 0; i--) { 695 switch (ch = in.read()) { 696 case '0': 697 case '1': 698 case '2': 699 case '3': 700 case '4': 701 case '5': 702 case '6': 703 case '7': 704 n = (n << 3) + ch - '0'; 705 break; 706 default: 707 if (n > 0xFF) { 708 env.error(p, "invalid.escape.char"); 709 } 710 return n; 711 } 712 } 713 ch = in.read(); 714 if (n > 0xFF) { 715 env.error(p, "invalid.escape.char"); 716 } 717 return n; 718 } 719 case 'r': 720 ch = in.read(); 721 return '\r'; 722 case 'n': 723 ch = in.read(); 724 return '\n'; 725 case 'f': 726 ch = in.read(); 727 return '\f'; 728 case 'b': 729 ch = in.read(); 730 return '\b'; 731 case 't': 732 ch = in.read(); 733 return '\t'; 734 case '\\': 735 ch = in.read(); 736 return '\\'; 737 case '\"': 738 ch = in.read(); 739 return '\"'; 740 case '\'': 741 ch = in.read(); 742 return '\''; 743 case 'u': 744 int unich = in.convertUnicode(); 745 ch = in.read(); 746 return unich; 747 } 748 return '\\'; 749 } 750 751 /** 752 * Scan a string. The current character should be the opening " of the string. 753 */ 754 private void scanString() throws IOException { 755 token = Token.STRINGVAL; 756 count = 0; 757 ch = in.read(); 758 759 // Scan a String 760 while (true) { 761 switch (ch) { 762 case EOF: 763 env.error(pos, "eof.in.string"); 764 stringValue = bufferString(); 765 return; 766 case '\n': 767 ch = in.read(); 768 env.error(pos, "newline.in.string"); 769 stringValue = bufferString(); 770 return; 771 case '"': 772 ch = in.read(); 773 stringValue = bufferString(); 774 return; 775 case '\\': { 776 int c = scanEscapeChar(); 777 if (c >= 0) { 778 putCh((char) c); 779 } 780 break; 781 } 782 default: 783 putCh(ch); 784 ch = in.read(); 785 break; 786 } 787 } 788 } 789 790 791 /** 792 * Scan an Identifier. The current character should be the first character of the 793 * identifier. 794 */ 795 private void scanIdentifier(char[] prefix) throws IOException { 796 int firstChar; 797 count = 0; 798 if(prefix != null) { 799 for(;;) { 800 for (int i = 0; i < prefix.length; i++) 801 putCh(prefix[i]); 802 ch = in.read(); 803 if (ch == '\\') { 804 ch = in.read(); 805 if (ch == 'u') { 806 ch = in.convertUnicode(); 807 if (!isUCLetter(ch) && !isUCDigit(ch)) { 808 prefix = new char[]{(char)ch}; 809 continue; 810 } 811 } else if (escapingAllowed.test(ch)) { 812 prefix = new char[]{(char)ch}; 813 continue; 814 } 815 int p = in.pos; 816 env.error(p, "invalid.escape.char"); 817 } 818 break; 819 } 820 } 821 firstChar = ch; 822 boolean firstIteration = true; 823 scanloop: 824 while (true) { 825 putCh(ch); 826 ch = in.read(); 827 828 // Check to see if the annotation marker is at 829 // the front of the identifier. 830 if (firstIteration && firstChar == '@') { 831 // May be a type annotation 832 if (ch == 'T') { // type annotation 833 putCh(ch); 834 ch = in.read(); 835 } 836 837 // is either a runtime visible or invisible annotation 838 if (ch == '+' || ch == '-') { // regular annotation 839 // possible annotation - 840 // need to eat up the '@+' or '@-' 841 putCh(ch); 842 ch = in.read(); 843 } 844 idValue = bufferString(); 845 stringValue = idValue; 846 token = Token.ANNOTATION; 847 return; 848 } 849 850 firstIteration = false; 851 switch (ch) { 852 case 'a': 853 case 'b': 854 case 'c': 855 case 'd': 856 case 'e': 857 case 'f': 858 case 'g': 859 case 'h': 860 case 'i': 861 case 'j': 862 case 'k': 863 case 'l': 864 case 'm': 865 case 'n': 866 case 'o': 867 case 'p': 868 case 'q': 869 case 'r': 870 case 's': 871 case 't': 872 case 'u': 873 case 'v': 874 case 'w': 875 case 'x': 876 case 'y': 877 case 'z': 878 case 'A': 879 case 'B': 880 case 'C': 881 case 'D': 882 case 'E': 883 case 'F': 884 case 'G': 885 case 'H': 886 case 'I': 887 case 'J': 888 case 'K': 889 case 'L': 890 case 'M': 891 case 'N': 892 case 'O': 893 case 'P': 894 case 'Q': 895 case 'R': 896 case 'S': 897 case 'T': 898 case 'U': 899 case 'V': 900 case 'W': 901 case 'X': 902 case 'Y': 903 case 'Z': 904 case '0': 905 case '1': 906 case '2': 907 case '3': 908 case '4': 909 case '5': 910 case '6': 911 case '7': 912 case '8': 913 case '9': 914 case '$': 915 case '_': 916 case '-': 917 case '[': 918 case ']': 919 case '(': 920 case ')': 921 case '<': 922 case '>': 923 break; 924 case '/': {// may be comment right after identifier 925 int c = in.lookForward(); 926 if ((c == '*') || (c == '/')) { 927 break scanloop; // yes, comment 928 } 929 break; // no, continue to parse identifier 930 } 931 case '\\': 932 ch = in.read(); 933 if ( ch == 'u') { 934 ch = in.convertUnicode(); 935 if (isUCLetter(ch) || isUCDigit(ch)) { 936 break; 937 } 938 } else if( escapingAllowed.test(ch)) { 939 break; 940 } 941 int p = in.pos; 942 env.error(p, "invalid.escape.char"); 943 default: 944 // if ((!isUCDigit(ch)) && (!isUCLetter(ch))) { 945 break scanloop; 946 // } 947 } // end switch 948 } // end scanloop 949 idValue = bufferString(); 950 stringValue = idValue; 951 token = keyword_token_ident(idValue); 952 debugStr(format("##### SCANNER (scanIdent) ######## token = %s value = \"%s\"\n", token, idValue)); 953 } // end scanIdentifier 954 955 //============================== 956 @SuppressWarnings("empty-statement") 957 protected final void xscan() throws IOException { 958 docComment = null; 959 loop: 960 for (;;) { 961 pos = in.pos; 962 switch (ch) { 963 case EOF: 964 token = Token.EOF; 965 break loop; 966 case '\n': 967 case ' ': 968 case '\t': 969 case '\f': 970 ch = in.read(); 971 break; 972 case '/': 973 switch (ch = in.read()) { 974 case '/': 975 // Parse a // comment 976 while (((ch = in.read()) != EOF) && (ch != '\n')); 977 break; 978 case '*': 979 ch = in.read(); 980 if (ch == '*') { 981 docComment = scanDocComment(); 982 } else { 983 skipComment(); 984 } 985 break; 986 default: 987 token = Token.DIV; 988 break loop; 989 } 990 break; 991 case '"': 992 scanString(); 993 break loop; 994 case '-': 995 intValue = -1; 996 token = Token.SIGN; 997 ch = in.read(); 998 break loop; 999 case '+': 1000 intValue = +1; 1001 ch = in.read(); 1002 token = Token.SIGN; 1003 break loop; 1004 case '0': 1005 case '1': 1006 case '2': 1007 case '3': 1008 case '4': 1009 case '5': 1010 case '6': 1011 case '7': 1012 case '8': 1013 case '9': 1014 scanNumber(); 1015 break loop; 1016 case '.': 1017 switch (ch = in.read()) { 1018 case '0': 1019 case '1': 1020 case '2': 1021 case '3': 1022 case '4': 1023 case '5': 1024 case '6': 1025 case '7': 1026 case '8': 1027 case '9': 1028 count = 0; 1029 putCh('.'); 1030 scanReal(); 1031 break; 1032 default: 1033 token = Token.FIELD; 1034 } 1035 break loop; 1036 case '{': 1037 ch = in.read(); 1038 token = Token.LBRACE; 1039 break loop; 1040 case '}': 1041 ch = in.read(); 1042 token = Token.RBRACE; 1043 break loop; 1044 case ',': 1045 ch = in.read(); 1046 token = Token.COMMA; 1047 break loop; 1048 case ';': 1049 ch = in.read(); 1050 token = Token.SEMICOLON; 1051 break loop; 1052 case ':': 1053 ch = in.read(); 1054 token = Token.COLON; 1055 break loop; 1056 case '=': 1057 if ((ch = in.read()) == '=') { 1058 ch = in.read(); 1059 token = Token.EQ; 1060 break loop; 1061 } 1062 token = Token.ASSIGN; 1063 break loop; 1064 case 'a': 1065 case 'b': 1066 case 'c': 1067 case 'd': 1068 case 'e': 1069 case 'f': 1070 case 'g': 1071 case 'h': 1072 case 'i': 1073 case 'j': 1074 case 'k': 1075 case 'l': 1076 case 'm': 1077 case 'n': 1078 case 'o': 1079 case 'p': 1080 case 'q': 1081 case 'r': 1082 case 's': 1083 case 't': 1084 case 'u': 1085 case 'v': 1086 case 'w': 1087 case 'x': 1088 case 'y': 1089 case 'z': 1090 case 'A': 1091 case 'B': 1092 case 'C': 1093 case 'D': 1094 case 'E': 1095 case 'F': 1096 case 'G': 1097 case 'H': 1098 case 'I': 1099 case 'J': 1100 case 'K': 1101 case 'L': 1102 case 'M': 1103 case 'N': 1104 case 'O': 1105 case 'P': 1106 case 'Q': 1107 case 'R': 1108 case 'S': 1109 case 'T': 1110 case 'U': 1111 case 'V': 1112 case 'W': 1113 case 'X': 1114 case 'Y': 1115 case 'Z': 1116 case '$': 1117 case '_': 1118 case '@': 1119 case '[': 1120 case ']': 1121 case '(': 1122 case ')': 1123 case '<': 1124 case '>': 1125 scanIdentifier(null); 1126 break loop; 1127 case '\u001a': 1128 // Our one concession to DOS. 1129 if ((ch = in.read()) == EOF) { 1130 token = Token.EOF; 1131 break loop; 1132 } 1133 env.error(pos, "funny.char"); 1134 ch = in.read(); 1135 break; 1136 case '#': 1137 int c = in.lookForward(); 1138 if (c == '{') { 1139 // '#' char denotes a "paramMethod name" token 1140 ch = in.read(); 1141 token = Token.PARAM_NAME; 1142 break loop; 1143 } 1144 // otherwise, it is a normal cpref 1145 scanCPRef(); 1146 break loop; 1147 case '\\': 1148 ch = in.read(); 1149 if ( ch == 'u') { 1150 ch = in.convertUnicode(); 1151 if (isUCLetter(ch)) { 1152 scanIdentifier(null); 1153 break loop; 1154 } 1155 } else if( escapingAllowed.test(ch)) { 1156 scanIdentifier(new char[]{'\\', (char)ch}); 1157 break loop; 1158 } 1159 // if ((ch = in.read()) == 'u') { 1160 // ch = in.convertUnicode(); 1161 // if (isUCLetter(ch)) { 1162 // scanIdentifier(); 1163 // break loop; 1164 // } 1165 // } 1166 default: 1167 env.out.println("funny.char:" + env.lineNumber(pos) + "/" + (pos & ((1 << OFFSETBITS) - 1))); 1168 env.error(pos, "funny.char"); 1169 ch = in.read(); 1170 } 1171 } 1172 } 1173 1174 @Override 1175 protected void debugScan(String dbstr) { 1176 if (token == null) { 1177 env.traceln(dbstr + "<<<NULL TOKEN>>>"); 1178 return; 1179 } 1180 env.trace(dbstr + token); 1181 switch (token) { 1182 case IDENT: 1183 env.traceln(" = '" + stringValue + "' {idValue = '" + idValue + "'}"); 1184 break; 1185 case STRINGVAL: 1186 env.traceln(" = {stringValue}: \"" + stringValue + "\""); 1187 break; 1188 case INTVAL: 1189 env.traceln(" = {intValue}: " + intValue + "}"); 1190 break; 1191 case FLOATVAL: 1192 env.traceln(" = {floatValue}: " + floatValue); 1193 break; 1194 case DOUBLEVAL: 1195 env.traceln(" = {doubleValue}: " + doubleValue); 1196 break; 1197 default: 1198 env.traceln(""); 1199 } 1200 } 1201 1202 private Predicate<Integer> noFunc = (ch)-> false; 1203 private Predicate<Integer> yesAndProcessFunc = (ch) -> { 1204 boolean res = ((ch == '\\') || (ch == ':') || (ch == '@')); 1205 if (res) 1206 putCh('\\'); 1207 return res; 1208 }; 1209 }