1 /* 2 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.openjdk.asmtools.jcoder; 24 25 import static org.openjdk.asmtools.jcoder.JcodTokens.*; 26 27 import java.io.IOException; 28 import java.util.HashMap; 29 30 /** 31 * A Scanner for Jcoder tokens. Errors are reported to the environment object.<p> 32 * 33 * The scanner keeps track of the current token, the value of the current token (if any), 34 * and the start position of the current token.<p> 35 * 36 * The scan() method advances the scanner to the next token in the input.<p> 37 * 38 * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[') 39 * with their closing counter part. This is useful during error recovery.<p> 40 * 41 * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p> 42 */ 43 public class Scanner { 44 /*-------------------------------------------------------- */ 45 /* Scanner Fields */ 46 47 /** 48 * End of input 49 */ 50 public static final int EOF = -1; 51 public static final int LBRACE = 123; // "{" 52 private boolean debugCP = false; 53 private int numCPentrs = 0; 54 55 /** 56 * Where errors are reported 57 */ 58 protected SourceFile env; 59 60 /** 61 * Input stream 62 */ 63 protected SourceFile in; 64 HashMap<String, String> macros; 65 66 /** 67 * The current character 68 */ 69 protected int ch, prevCh = -1; 70 protected String macro; 71 protected int indexMacro; 72 73 /** 74 * Current token 75 */ 76 protected Token token; 77 78 /** 79 * The position of the current token 80 */ 81 protected int pos; 82 83 /** 84 * The position of the previous token 85 */ 86 protected int prevPos; 87 88 /* Token values. */ 89 protected long longValue; 90 protected int intValue; 91 protected int intSize; 92 protected String stringValue; 93 protected ByteBuffer longStringValue; 94 protected int sign; // sign, when reading number 95 96 /* A doc comment preceding the most recent token */ 97 protected String docComment; 98 99 /** 100 * A growable character buffer. 101 */ 102 private int count; 103 private char[] buffer = new char[32]; 104 105 /*-------------------------------------------------------- */ 106 /** 107 * Create a scanner to scan an input stream. 108 */ 109 protected Scanner(SourceFile sf, HashMap<String, String> macros) 110 throws IOException { 111 this.env = sf; 112 this.in = sf; 113 this.macros = macros; 114 115 ch = sf.read(); 116 prevPos = sf.pos; 117 118 scan(); 119 } 120 121 /** 122 * for use in jcfront. 123 */ 124 protected Scanner(SourceFile sf) 125 throws IOException { 126 this.env = sf; 127 this.in = sf; 128 this.macros = new HashMap<>(); 129 130 ch = sf.read(); 131 prevPos = sf.pos; 132 133 scan(); 134 } 135 136 /* *********************************************** */ 137 void setDebugCP(boolean enable) { 138 if (enable) { 139 numCPentrs = 0; 140 } 141 debugCP = enable; 142 143 } 144 145 void addConstDebug(ConstType ct) { 146 numCPentrs += 1; 147 env.traceln("\n Const[" + numCPentrs + "] = " + ct.printval()); 148 } 149 150 void setMacro(String macro) { 151 this.macro = macro; 152 indexMacro = 0; 153 prevCh = ch; 154 } 155 156 void readCh() throws IOException { 157 if (macro != null) { 158 if (indexMacro < macro.length()) { 159 ch = macro.charAt(indexMacro); 160 } 161 macro = null; 162 } 163 if (prevCh >= 0) { 164 ch = prevCh; 165 prevCh = -1; 166 } else { 167 ch = in.read(); 168 } 169 } 170 171 private void putc(int ch) { 172 if (count == buffer.length) { 173 char[] newBuffer = new char[buffer.length * 2]; 174 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length); 175 buffer = newBuffer; 176 } 177 buffer[count++] = (char) ch; 178 } 179 180 private String bufferString() { 181 char[] buf = new char[count]; 182 System.arraycopy(buffer, 0, buf, 0, count); 183 return new String(buf); 184 } 185 186 /** 187 * Scan a comment. This method should be called once the initial /, * and the next 188 * character have been read. 189 */ 190 private void skipComment() throws IOException { 191 while (true) { 192 switch (ch) { 193 case EOF: 194 env.error(pos, "eof.in.comment"); 195 return; 196 197 case '*': 198 readCh(); 199 if (ch == '/') { 200 readCh(); 201 return; 202 } 203 break; 204 205 default: 206 readCh(); 207 break; 208 } 209 } 210 } 211 212 /** 213 * Scan a doc comment. This method should be called once the initial /, * and * have 214 * been read. It gathers the content of the comment (witout leading spaces and '*'s) 215 * in the string buffer. 216 */ 217 private String scanDocComment() throws IOException { 218 count = 0; 219 220 if (ch == '*') { 221 do { 222 readCh(); 223 } while (ch == '*'); 224 if (ch == '/') { 225 readCh(); 226 return ""; 227 } 228 } 229 switch (ch) { 230 case '\n': 231 case ' ': 232 readCh(); 233 break; 234 } 235 236 boolean seenstar = false; 237 int c = count; 238 while (true) { 239 switch (ch) { 240 case EOF: 241 env.error(pos, "eof.in.comment"); 242 return bufferString(); 243 244 case '\n': 245 putc('\n'); 246 readCh(); 247 seenstar = false; 248 c = count; 249 break; 250 251 case ' ': 252 case '\t': 253 putc(ch); 254 readCh(); 255 break; 256 257 case '*': 258 if (seenstar) { 259 readCh(); 260 if (ch == '/') { 261 readCh(); 262 count = c; 263 return bufferString(); 264 } 265 putc('*'); 266 } else { 267 seenstar = true; 268 count = c; 269 do { 270 readCh(); 271 } while (ch == '*'); 272 switch (ch) { 273 case ' ': 274 readCh(); 275 break; 276 277 case '/': 278 readCh(); 279 count = c; 280 return bufferString(); 281 } 282 } 283 break; 284 285 default: 286 if (!seenstar) { 287 seenstar = true; 288 } 289 putc(ch); 290 readCh(); 291 c = count; 292 break; 293 } 294 } 295 } 296 297 /** 298 * Scan a decimal number 299 */ 300 private void scanDecNumber() throws IOException { 301 boolean overflow = false; 302 long value = ch - '0'; 303 count = 0; 304 token = Token.INTVAL; 305 intSize = 2; // default 306 putc(ch); // save character in buffer 307 numberLoop: 308 for (;;) { 309 readCh(); 310 switch (ch) { 311 case '8': 312 case '9': 313 case '0': 314 case '1': 315 case '2': 316 case '3': 317 case '4': 318 case '5': 319 case '6': 320 case '7': 321 putc(ch); 322 overflow = overflow || (value * 10) / 10 != value; 323 value = (value * 10) + (ch - '0'); 324 overflow = overflow || (value - 1 < -1); 325 break; 326 case 'b': 327 readCh(); 328 intSize = 1; 329 break numberLoop; 330 case 's': 331 readCh(); 332 intSize = 2; 333 break numberLoop; 334 case 'i': 335 readCh(); 336 intSize = 4; 337 break numberLoop; 338 case 'l': 339 readCh(); 340 intSize = 8; 341 break numberLoop; 342 default: 343 break numberLoop; 344 } 345 } 346 longValue = value; 347 intValue = (int) value; 348 // we have just finished reading the number. The next thing better 349 // not be a letter or digit. 350 if (Character.isJavaIdentifierPart((char) ch) || ch == '.') { 351 env.error(in.pos, "invalid.number", Character.toString((char)ch)); 352 do { 353 readCh(); 354 } while (Character.isJavaIdentifierPart((char) ch) || ch == '.'); 355 return; 356 } 357 if (overflow) { 358 env.error(pos, "overflow"); 359 } 360 } // scanNumber() 361 362 /** 363 * Scan a hex number. 364 */ 365 private void scanHexNumber() throws IOException { 366 boolean overflow = false; 367 long value = 0; 368 int cypher; 369 count = 0; 370 token = Token.INTVAL; 371 intSize = 2; // default 372 putc(ch); // save character in buffer 373 numberLoop: 374 for (int k = 0;; k++) { 375 readCh(); 376 switch (ch) { 377 case '8': 378 case '9': 379 case '0': 380 case '1': 381 case '2': 382 case '3': 383 case '4': 384 case '5': 385 case '6': 386 case '7': 387 cypher = (char) ch - '0'; 388 break; 389 case 'd': 390 case 'D': 391 case 'e': 392 case 'E': 393 case 'f': 394 case 'F': 395 case 'a': 396 case 'A': 397 case 'b': 398 case 'B': 399 case 'c': 400 case 'C': 401 cypher = 10 + Character.toLowerCase((char) ch) - 'a'; 402 break; 403 404 default: 405 break numberLoop; 406 } 407 putc(ch); 408 overflow = overflow || ((value >>> 60) != 0); 409 value = (value << 4) + cypher; 410 intSize = (k + 1) / 2; 411 } 412 longValue = value; 413 intValue = (int) value; 414 // we have just finished reading the number. The next thing better 415 // not be a letter or digit. 416 if (Character.isJavaIdentifierPart((char) ch) || ch == '.') { 417 env.error(in.pos, "invalid.number", Character.toString((char)ch)); 418 do { 419 readCh(); 420 } while (Character.isJavaIdentifierPart((char) ch) || ch == '.'); 421 intValue = 0; 422 // } else if ( overflow || (intValue - 1 < -1) ) { 423 } else if (overflow) { 424 intValue = 0; // so we don't get second overflow in Parser 425 env.error(pos, "overflow"); 426 } 427 } // scanNumber() 428 429 /** 430 * Scan an escape character. 431 * 432 * @return the character or -1 if it escaped an end-of-line. 433 */ 434 private int scanEscapeChar() throws IOException { 435 int p = in.pos; 436 437 readCh(); 438 switch (ch) { 439 case '0': 440 case '1': 441 case '2': 442 case '3': 443 case '4': 444 case '5': 445 case '6': 446 case '7': { 447 int n = ch - '0'; 448 for (int i = 2; i > 0; i--) { 449 readCh(); 450 switch (ch) { 451 case '0': 452 case '1': 453 case '2': 454 case '3': 455 case '4': 456 case '5': 457 case '6': 458 case '7': 459 n = (n << 3) + ch - '0'; 460 break; 461 462 default: 463 if (n > 0xFF) { 464 env.error(p, "invalid.escape.char"); 465 } 466 return n; 467 } 468 } 469 readCh(); 470 if (n > 0xFF) { 471 env.error(p, "invalid.escape.char"); 472 } 473 return n; 474 } 475 case 'r': 476 readCh(); 477 return '\r'; 478 case 'n': 479 readCh(); 480 return '\n'; 481 case 'f': 482 readCh(); 483 return '\f'; 484 case 'b': 485 readCh(); 486 return '\b'; 487 case 't': 488 readCh(); 489 return '\t'; 490 case '\\': 491 readCh(); 492 return '\\'; 493 case '\"': 494 readCh(); 495 return '\"'; 496 case '\'': 497 readCh(); 498 return '\''; 499 } 500 501 env.error(p, "invalid.escape.char"); 502 readCh(); 503 return -1; 504 } 505 506 /** 507 * Scan a string. The current character should be the opening " of the string. 508 */ 509 private void scanString() throws IOException { 510 token = Token.STRINGVAL; 511 count = 0; 512 readCh(); 513 514 loop: 515 for (;;) { 516 switch (ch) { 517 case EOF: 518 env.error(pos, "eof.in.string"); 519 break loop; 520 521 case '\n': 522 readCh(); 523 env.error(pos, "newline.in.string"); 524 break loop; 525 526 case '"': 527 readCh(); 528 break loop; 529 530 case '\\': { 531 int c = scanEscapeChar(); 532 if (c >= 0) { 533 putc((char)c); 534 } 535 break; 536 } 537 538 default: 539 putc(ch); 540 readCh(); 541 break; 542 } 543 } 544 stringValue = bufferString(); 545 } 546 547 /** 548 * Scan a character array. The current character should be the opening ' of the array. 549 */ 550 private void scanCharArray() throws IOException { 551 token = Token.LONGSTRINGVAL; 552 ByteBuffer buf = new ByteBuffer(); 553 count = 0; 554 readCh(); 555 556 loop: 557 for (;;) { 558 int c = ch; 559 switch (ch) { 560 case EOF: 561 env.error(pos, "eof.in.string"); 562 break loop; 563 564 case '\n': 565 readCh(); 566 env.error(pos, "newline.in.string"); 567 break loop; 568 569 case '\'': 570 readCh(); 571 break loop; 572 573 case '\\': 574 c = scanEscapeChar(); 575 if (c < 0) { 576 break; 577 } 578 // no break - continue 579 default: 580 // see description of java.io.DataOutput.writeUTF() 581 if ((c > 0) && (c <= 0x7F)) { 582 buf.write(c); 583 } else if ((c == 0) || ((c >= 0x80) && (c <= 0x7FF))) { 584 buf.write(0xC0 | (0x1F & (c >> 6))); 585 buf.write(0x80 | (0x3f & c)); 586 } else { 587 buf.write(0xc0 | (0x0f & (c >> 12))); 588 buf.write(0x80 | (0x3f & (c >> 6))); 589 buf.write(0x80 | (0x3f & c)); 590 } 591 readCh(); 592 } 593 } 594 longStringValue = buf; 595 } 596 597 /** 598 * Scan an Identifier. The current character should be the first character of the 599 * identifier. 600 */ 601 private void scanIdentifier() throws IOException { 602 count = 0; 603 boolean compound = false; 604 for (;;) { 605 putc(ch); 606 readCh(); 607 if ((ch == '/') || (ch == '.') || (ch == '-')) { 608 compound = true; 609 } else if (!Character.isJavaIdentifierPart((char) ch)) { 610 break; 611 } 612 } 613 stringValue = bufferString(); 614 if (compound) { 615 token = Token.IDENT; 616 } else { 617 token = keyword_token_ident(stringValue); 618 if (token == Token.IDENT) { 619 intValue = constValue(stringValue); 620 if (intValue != -1) { 621 // this is a constant 622 if (debugCP) { 623 ConstType ct = constType(stringValue); 624 if (ct != null) { 625 addConstDebug(ct); 626 } 627 } 628 token = Token.INTVAL; 629 intSize = 1; 630 longValue = intValue; 631 } 632 } 633 } 634 } // end scanIdentifier 635 636 // skip till symbol 637 protected void skipTill(int sym) throws IOException { 638 while (true) { 639 if( ch == EOF ) { 640 env.error(pos, "eof.in.comment"); 641 return; 642 } else if (ch == sym) { 643 return; 644 } 645 readCh(); 646 } 647 } 648 649 protected int xscan() throws IOException { 650 int retPos = pos; 651 prevPos = in.pos; 652 docComment = null; 653 sign = 1; 654 for (;;) { 655 pos = in.pos; 656 657 switch (ch) { 658 case EOF: 659 token = Token.EOF; 660 return retPos; 661 662 case '\n': 663 case ' ': 664 case '\t': 665 case '\f': 666 readCh(); 667 break; 668 669 case '/': 670 readCh(); 671 switch (ch) { 672 case '/': 673 // Parse a // comment 674 do { 675 readCh(); 676 } while ((ch != EOF) && (ch != '\n')); 677 break; 678 679 case '*': 680 readCh(); 681 if (ch == '*') { 682 docComment = scanDocComment(); 683 } else { 684 skipComment(); 685 } 686 break; 687 688 default: 689 token = Token.DIV; 690 return retPos; 691 } 692 break; 693 694 case '"': 695 scanString(); 696 return retPos; 697 698 case '\'': 699 scanCharArray(); 700 return retPos; 701 702 case '-': 703 sign = -sign; // hack: no check that numbers only are allowed after 704 case '+': 705 readCh(); 706 break; 707 708 case '0': 709 readCh(); 710 token = Token.INTVAL; 711 longValue = intValue = 0; 712 switch (ch) { 713 case 'x': 714 case 'X': 715 scanHexNumber(); 716 break; 717 case '0': 718 case '1': 719 case '2': 720 case '3': 721 case '4': 722 case '5': 723 case '6': 724 case '7': 725 case '8': 726 case '9': 727 scanDecNumber(); 728 break; 729 case 'b': 730 readCh(); 731 intSize = 1; 732 break; 733 case 's': 734 readCh(); 735 intSize = 2; 736 break; 737 case 'i': 738 readCh(); 739 intSize = 4; 740 break; 741 case 'l': 742 readCh(); 743 intSize = 8; 744 break; 745 default: 746 intSize = 2; 747 } 748 return retPos; 749 750 case '1': 751 case '2': 752 case '3': 753 case '4': 754 case '5': 755 case '6': 756 case '7': 757 case '8': 758 case '9': 759 scanDecNumber(); 760 return retPos; 761 762 case '{': 763 readCh(); 764 token = Token.LBRACE; 765 return retPos; 766 767 case '}': 768 readCh(); 769 token = Token.RBRACE; 770 return retPos; 771 772 case '(': 773 readCh(); 774 token = Token.LPAREN; 775 return retPos; 776 777 case ')': 778 readCh(); 779 token = Token.RPAREN; 780 return retPos; 781 782 case '[': 783 readCh(); 784 token = Token.LSQBRACKET; 785 return retPos; 786 787 case ']': 788 readCh(); 789 token = Token.RSQBRACKET; 790 return retPos; 791 792 case ',': 793 readCh(); 794 token = Token.COMMA; 795 return retPos; 796 797 case ';': 798 readCh(); 799 token = Token.SEMICOLON; 800 return retPos; 801 802 case ':': 803 readCh(); 804 token = Token.COLON; 805 return retPos; 806 807 case '=': 808 readCh(); 809 if (ch == '=') { 810 readCh(); 811 token = Token.EQ; 812 return retPos; 813 } 814 token = Token.ASSIGN; 815 return retPos; 816 817 case '\u001a': 818 // Our one concession to DOS. 819 readCh(); 820 if (ch == EOF) { 821 token = Token.EOF; 822 return retPos; 823 } 824 env.error(pos, "funny.char"); 825 readCh(); 826 break; 827 828 case '#': 829 readCh(); 830 scanDecNumber(); 831 return retPos; 832 833 case '&': { 834 readCh(); 835 retPos = pos; 836 if (!Character.isJavaIdentifierStart((char) ch)) { 837 env.error(pos, "identifier.expected"); 838 } 839 scanIdentifier(); 840 String macroId = stringValue; 841 String macro = (String) macros.get(macroId); 842 if (macro == null) { 843 env.error(pos, "macro.undecl", macroId); 844 throw new SyntaxError(); 845 } 846 setMacro(macro); 847 readCh(); 848 } 849 break; 850 851 default: 852 if (Character.isJavaIdentifierStart((char) ch)) { 853 scanIdentifier(); 854 return retPos; 855 } 856 env.error(pos, "funny.char"); 857 readCh(); 858 break; 859 } 860 } 861 } 862 863 /** 864 * Scan to a matching '}', ']' or ')'. The current token must be a '{', '[' or '('; 865 */ 866 protected void match(Token open, Token close) throws IOException { 867 int depth = 1; 868 869 while (true) { 870 scan(); 871 if (token == open) { 872 depth++; 873 } else if (token == close) { 874 if (--depth == 0) { 875 return; 876 } 877 } else if (token == Token.EOF) { 878 env.error(pos, "unbalanced.paren"); 879 return; 880 } 881 } 882 } 883 884 /** 885 * Scan the next token. 886 * 887 * @return the position of the previous token. 888 */ 889 protected int scan() throws IOException { 890 int retPos = xscan(); 891 //env.traceln("scanned:"+token+" ("+keywordName(token)+")"); 892 return retPos; 893 } 894 895 /** 896 * Scan the next token. 897 * 898 * @return the position of the previous token. 899 */ 900 protected int scanMacro() throws IOException { 901 int retPos = xscan(); 902 //env.traceln("scanned:"+token+" ("+keywordName(token)+")"); 903 return retPos; 904 } 905 }