1 /*
   2  * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.openjdk.asmtools.jasm;
  24 
  25 import static java.lang.String.format;
  26 import static org.openjdk.asmtools.jasm.JasmTokens.*;
  27 import static org.openjdk.asmtools.jasm.Constants.EOF;
  28 import static org.openjdk.asmtools.jasm.Constants.OFFSETBITS;
  29 import java.io.IOException;
  30 import java.util.function.Predicate;
  31 
  32 /**
  33  * A Scanner for Jasm tokens. Errors are reported to the environment object.<p>
  34  *
  35  * The scanner keeps track of the current token, the value of the current token (if any),
  36  * and the start position of the current token.<p>
  37  *
  38  * The scan() method advances the scanner to the next token in the input.<p>
  39  *
  40  * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[')
  41  * with their closing counter part. This is useful during error recovery.<p>
  42  *
  43  * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p>
  44  */
  45 public class Scanner extends ParseBase {
  46 
  47     /**
  48      * SyntaxError is the generic error thrown for parsing problems.
  49      */
  50     protected static class SyntaxError extends Error {
  51         boolean fatalError = false;
  52         SyntaxError Fatal() { fatalError = true; return this; }
  53         boolean isFatal() {return fatalError;}
  54     }
  55 
  56     /**
  57      * Input stream
  58      */
  59     protected Environment in;
  60 
  61     /**
  62      * The current character
  63      */
  64     protected int ch;
  65 
  66     /**
  67      * Current token
  68      */
  69 //    protected int token;
  70     protected Token token;
  71 
  72     /**
  73      * The position of the current token
  74      */
  75     protected int pos;
  76 
  77     /*
  78      * Token values.
  79      */
  80     protected char charValue;
  81     protected int intValue;
  82     protected long longValue;
  83     protected float floatValue;
  84     protected double doubleValue;
  85     protected String stringValue;
  86     protected String idValue;
  87     protected int radix;        // Radix, when reading int or long
  88 
  89     /*   doc comment preceding the most recent token  */
  90     protected String docComment;
  91 
  92     /* A growable character buffer. */
  93     private int count;
  94     private char buffer[] = new char[32];
  95     //
  96     private Predicate<Integer> escapingAllowed;
  97     /**
  98      * The position of the previous token
  99      */
 100     protected int prevPos;
 101     protected int sign;              // sign, when reading number
 102     protected boolean inBits;        // inBits prefix, when reading number
 103 
 104     /**
 105      * main constructor.
 106      *
 107      * Create a scanner to scan an input stream.
 108      */
 109     protected Scanner(Environment env) throws IOException {
 110         super.init(this, null, env);
 111         escapingAllowed = noFunc;
 112         this.in = env;
 113         ch = env.read();
 114         xscan();
 115     }
 116 
 117     protected void scanModuleStatement() throws IOException {
 118         try {
 119             escapingAllowed = yesAndProcessFunc;
 120             scan();
 121         } finally {
 122             escapingAllowed = noFunc;
 123         }
 124     }
 125 
 126     /**
 127      * scan
 128      *
 129      * Scan the next token.
 130      *
 131      * @throws IOException
 132      */
 133     protected void scan() throws IOException {
 134         int signloc = 1, cnt = 0;
 135         prevPos = pos;
 136 prefix:
 137         for (;;) {
 138             xscan();
 139             switch (token) {
 140                 case SIGN:
 141                     signloc = signloc * intValue;
 142                     break;
 143                 default:
 144                     break prefix;
 145             }
 146             cnt++;
 147         }
 148         switch (token) {
 149             case INTVAL:
 150             case LONGVAL:
 151             case FLOATVAL:
 152             case DOUBLEVAL:
 153             case INF:
 154             case NAN:
 155                 sign = signloc;
 156                 break;
 157             default:
 158         }
 159     }
 160 
 161     /**
 162      * Check the token may be identifier
 163      */
 164     protected final boolean checkTokenIdent() {
 165         return token.possibleJasmIdentifier();
 166     }
 167 
 168     static String readableConstant(int t) {
 169         return "<" + Tables.tag(t) + "> [" + t + "]";
 170     }
 171 
 172     /**
 173      * Expects a token, scans the next token or throws an exception.
 174      */
 175     protected final void expect(Token t) throws SyntaxError, IOException {
 176         check(t);
 177         scan();
 178     }
 179 
 180     /**
 181      * Checks a token, throws an exception if not the same
 182      */
 183     protected final void check(Token t) throws SyntaxError, IOException {
 184         if (token != t) {
 185             if ((t != Token.IDENT) || !checkTokenIdent()) {
 186                 env.traceln("expect: " + t + " instead of " + token);
 187                 switch (t) {
 188                     case IDENT:
 189                         env.error(pos, "identifier.expected");
 190                         break;
 191                     default:
 192                         env.error(pos, "token.expected", "<" + t.printValue() + ">");
 193                         break;
 194                 }
 195 
 196                 if (debugFlag) {
 197                     debugStr("<<<<<PROBLEM>>>>>>>: ");
 198                     throw new Error("<<<<<PROBLEM>>>>>>>");
 199                 } else {
 200                     throw new SyntaxError();
 201                 }
 202             }
 203         }
 204     }
 205 
 206     private void putCh(int ch) {
 207         if (count == buffer.length) {
 208             char newBuffer[] = new char[buffer.length * 2];
 209             System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
 210             buffer = newBuffer;
 211         }
 212         buffer[count++] = (char) ch;
 213     }
 214 
 215     private String bufferString() {
 216         char buf[] = new char[count];
 217         System.arraycopy(buffer, 0, buf, 0, count);
 218         return new String(buf);
 219     }
 220 
 221     /**
 222      * Returns true if the character is a unicode digit.
 223      *
 224      * @param ch the character to be checked
 225      */
 226     public static boolean isUCDigit(int ch) {
 227         if ((ch >= '0') && (ch <= '9')) {
 228             return true;
 229         }
 230         switch (ch >> 8) {
 231             case 0x06:
 232                 return ((ch >= 0x0660) && (ch <= 0x0669)) ||        // Arabic-Indic
 233                         ((ch >= 0x06f0) && (ch <= 0x06f9));         // Eastern Arabic-Indic
 234             case 0x07:
 235             case 0x08:
 236             default:
 237                 return false;
 238             case 0x09:
 239                 return ((ch >= 0x0966) && (ch <= 0x096f)) ||        // Devanagari
 240                         ((ch >= 0x09e6) && (ch <= 0x09ef));         // Bengali
 241             case 0x0a:
 242                 return ((ch >= 0x0a66) && (ch <= 0x0a6f)) ||        // Gurmukhi
 243                         ((ch >= 0x0ae6) && (ch <= 0x0aef));         // Gujarati
 244             case 0x0b:
 245                 return ((ch >= 0x0b66) && (ch <= 0x0b6f)) ||        // Oriya
 246                         ((ch >= 0x0be7) && (ch <= 0x0bef));         // Tamil
 247             case 0x0c:
 248                 return ((ch >= 0x0c66) && (ch <= 0x0c6f)) ||        // Telugu
 249                         ((ch >= 0x0ce6) && (ch <= 0x0cef));         // Kannada
 250             case 0x0d:
 251                 return ((ch >= 0x0d66) && (ch <= 0x0d6f));          // Malayalam
 252             case 0x0e:
 253                 return ((ch >= 0x0e50) && (ch <= 0x0e59)) ||        // Thai
 254                         ((ch >= 0x0ed0) && (ch <= 0x0ed9));         // Lao
 255             case 0x0f:
 256                 return false;
 257             case 0x10:
 258                 return ((ch >= 0x1040) && (ch <= 0x1049));         // Tibetan
 259         }
 260     }
 261 
 262     /**
 263      * Returns true if the character is a Unicode letter.
 264      *
 265      * @param ch the character to be checked
 266      */
 267     public static boolean isUCLetter(int ch) {
 268         // fast check for Latin capitals and small letters
 269         if (((ch >= 'A') && (ch <= 'Z'))
 270                 || ((ch >= 'a') && (ch <= 'z'))) {
 271             return true;
 272         }
 273         // rest of ISO-LATIN-1
 274         if (ch < 0x0100) {
 275             // fast check
 276             if (ch < 0x00c0) {
 277                 return (ch == '_') || (ch == '$');
 278             }
 279             // various latin letters and diacritics,
 280             // but *not* the multiplication and division symbols
 281             return ((ch >= 0x00c0) && (ch <= 0x00d6))
 282                     || ((ch >= 0x00d8) && (ch <= 0x00f6))
 283                     || ((ch >= 0x00f8) && (ch <= 0x00ff));
 284         }
 285         // other non CJK alphabets and symbols, but not digits
 286         if (ch <= 0x1fff) {
 287             return !isUCDigit(ch);
 288         }
 289         // rest are letters only in five ranges:
 290         //        Hiragana, Katakana, Bopomofo and Hangul
 291         //        CJK Squared Words
 292         //        Korean Hangul Symbols
 293         //        Han (Chinese, Japanese, Korean)
 294         //        Han compatibility
 295         return ((ch >= 0x3040) && (ch <= 0x318f))
 296                 || ((ch >= 0x3300) && (ch <= 0x337f))
 297                 || ((ch >= 0x3400) && (ch <= 0x3d2d))
 298                 || ((ch >= 0x4e00) && (ch <= 0x9fff))
 299                 || ((ch >= 0xf900) && (ch <= 0xfaff));
 300     }
 301 
 302     /**
 303      * Scan a comment. This method should be called once the initial /, * and the next
 304      * character have been read.
 305      */
 306     private void skipComment() throws IOException {
 307         while (true) {
 308             switch (ch) {
 309                 case EOF:
 310                     env.error(pos, "eof.in.comment");
 311                     return;
 312                 case '*':
 313                     if ((ch = in.read()) == '/') {
 314                         ch = in.read();
 315                         return;
 316                     }
 317                     break;
 318                 default:
 319                     ch = in.read();
 320                     break;
 321             }
 322         }
 323     }
 324 
 325     /**
 326      * Scan a doc comment. This method should be called once the initial /, * and * have
 327      * been read. It gathers the content of the comment (without leading spaces and '*'s)
 328      * in the string buffer.
 329      */
 330     @SuppressWarnings("empty-statement")
 331     private String scanDocComment() throws IOException {
 332         count = 0;
 333 
 334         if (ch == '*') {
 335             do {
 336                 ch = in.read();
 337             } while (ch == '*');
 338             if (ch == '/') {
 339                 ch = in.read();
 340                 return "";
 341             }
 342         }
 343         switch (ch) {
 344             case '\n':
 345             case ' ':
 346                 ch = in.read();
 347                 break;
 348         }
 349 
 350         boolean seenstar = false;
 351         int c = count;
 352         while (true) {
 353             switch (ch) {
 354                 case EOF:
 355                     env.error(pos, "eof.in.comment");
 356                     return bufferString();
 357                 case '\n':
 358                     putCh('\n');
 359                     ch = in.read();
 360                     seenstar = false;
 361                     c = count;
 362                     break;
 363                 case ' ':
 364                 case '\t':
 365                     putCh(ch);
 366                     ch = in.read();
 367                     break;
 368                 case '*':
 369                     if (seenstar) {
 370                         if ((ch = in.read()) == '/') {
 371                             ch = in.read();
 372                             count = c;
 373                             return bufferString();
 374                         }
 375                         putCh('*');
 376                     } else {
 377                         seenstar = true;
 378                         count = c;
 379                         while ((ch = in.read()) == '*');
 380                         switch (ch) {
 381                             case ' ':
 382                                 ch = in.read();
 383                                 break;
 384                             case '/':
 385                                 ch = in.read();
 386                                 count = c;
 387                                 return bufferString();
 388                         }
 389                     }
 390                     break;
 391                 default:
 392                     if (!seenstar) {
 393                         seenstar = true;
 394                     }
 395                     putCh(ch);
 396                     ch = in.read();
 397                     c = count;
 398                     break;
 399             }
 400         }
 401     }
 402 
 403     /**
 404      * Scan a decimal at this point
 405      */
 406     private void scanCPRef() throws IOException {
 407         switch (ch = in.read()) {
 408             case '0':
 409             case '1':
 410             case '2':
 411             case '3':
 412             case '4':
 413             case '5':
 414             case '6':
 415             case '7':
 416             case '8':
 417             case '9': {
 418                 boolean overflow = false;
 419                 long value = ch - '0';
 420                 count = 0;
 421                 putCh(ch);                // save character in buffer
 422 numberLoop:
 423                 for (;;) {
 424                     switch (ch = in.read()) {
 425                         case '0':
 426                         case '1':
 427                         case '2':
 428                         case '3':
 429                         case '4':
 430                         case '5':
 431                         case '6':
 432                         case '7':
 433                         case '8':
 434                         case '9':
 435                             putCh(ch);
 436                             if (overflow) {
 437                                 break;
 438                             }
 439                             value = (value * 10) + (ch - '0');
 440                             overflow = (value > 0xFFFF);
 441                             break;
 442                         default:
 443                             break numberLoop;
 444                     }
 445                 } // while true
 446                 intValue = (int) value;
 447                 stringValue = bufferString();
 448                 token = Token.CPINDEX;
 449                 if (overflow) {
 450                     env.error(pos, "overflow");
 451                 }
 452                 break;
 453             }
 454             default:
 455                 stringValue = Character.toString((char)ch);
 456                 env.error(in.pos, "invalid.number", stringValue);
 457                 intValue = 0;
 458                 token = Token.CPINDEX;
 459                 ch = in.read();
 460         }
 461     } // scanCPRef()
 462 
 463     /**
 464      * Scan a number. The first digit of the number should be the current character. We
 465      * may be scanning hex, decimal, or octal at this point
 466      */
 467     private void scanNumber() throws IOException {
 468         boolean seenNonOctal = false;
 469         boolean overflow = false;
 470         radix = (ch == '0' ? 8 : 10);
 471         long value = ch - '0';
 472         count = 0;
 473         putCh(ch);                // save character in buffer
 474 numberLoop:
 475         for (;;) {
 476             switch (ch = in.read()) {
 477                 case '.':
 478                     if (radix == 16) {
 479                         break numberLoop; // an illegal character
 480                     }
 481                     scanReal();
 482                     return;
 483 
 484                 case '8':
 485                 case '9':
 486                     // We can't yet throw an error if reading an octal.  We might
 487                     // discover we're really reading a real.
 488                     seenNonOctal = true;
 489                 case '0':
 490                 case '1':
 491                 case '2':
 492                 case '3':
 493                 case '4':
 494                 case '5':
 495                 case '6':
 496                 case '7':
 497                     putCh(ch);
 498                     if (radix == 10) {
 499                         overflow = overflow || (value * 10) / 10 != value;
 500                         value = (value * 10) + (ch - '0');
 501                         overflow = overflow || (value - 1 < -1);
 502                     } else if (radix == 8) {
 503                         overflow = overflow || (value >>> 61) != 0;
 504                         value = (value << 3) + (ch - '0');
 505                     } else {
 506                         overflow = overflow || (value >>> 60) != 0;
 507                         value = (value << 4) + (ch - '0');
 508                     }
 509                     break;
 510                 case 'd':
 511                 case 'D':
 512                 case 'e':
 513                 case 'E':
 514                 case 'f':
 515                 case 'F':
 516                     if (radix != 16) {
 517                         scanReal();
 518                         return;
 519                     }
 520                 // fall through
 521                 case 'a':
 522                 case 'A':
 523                 case 'b':
 524                 case 'B':
 525                 case 'c':
 526                 case 'C':
 527                     putCh(ch);
 528                     if (radix != 16) {
 529                         break numberLoop; // an illegal character
 530                     }
 531                     overflow = overflow || (value >>> 60) != 0;
 532                     value = (value << 4) + 10
 533                             + Character.toLowerCase((char) ch) - 'a';
 534                     break;
 535                 case 'l':
 536                 case 'L':
 537                     ch = in.read();        // skip over 'l'
 538                     longValue = value;
 539                     token = Token.LONGVAL;
 540                     break numberLoop;
 541                 case 'x':
 542                 case 'X':
 543                    // if the first character is a '0' and this is the second
 544                     // letter, then read in a hexadecimal number.  Otherwise, error.
 545                     if (count == 1 && radix == 8) {
 546                         radix = 16;
 547                         break;
 548                     } else {
 549                         // we'll get an illegal character error
 550                         break numberLoop;
 551                     }
 552                 default:
 553                     intValue = (int) value;
 554                     token = Token.INTVAL;
 555                     break numberLoop;
 556             }
 557         } // while true
 558         // we have just finished reading the number.  The next thing better
 559         // not be a letter or digit.
 560         if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') {
 561             env.error(in.pos, "invalid.number", Character.toString((char)ch));
 562             do {
 563                 ch = in.read();
 564             } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.');
 565             intValue = 0;
 566             token = Token.INTVAL;
 567         } else if (radix == 8 && seenNonOctal) {
 568             intValue = 0;
 569             token = Token.INTVAL;
 570             env.error(in.pos, "invalid.octal.number");
 571         } else if (overflow
 572                 || (token == Token.INTVAL
 573                 && ((radix == 10) ? (intValue - 1 < -1)
 574                         : ((value & 0xFFFFFFFF00000000L) != 0)))) {
 575             intValue = 0;        // so we don't get second overflow in Parser
 576             longValue = 0;
 577             env.error(pos, "overflow");
 578         }
 579     } // scanNumber()
 580 
 581     /**
 582      * Scan a float. We are either looking at the decimal, or we have already seen it and
 583      * put it into the buffer. We haven't seen an exponent. Scan a float. Should be called
 584      * with the current character is either the 'e', 'E' or '.'
 585      */
 586     private void scanReal() throws IOException {
 587         boolean seenExponent = false;
 588         boolean isSingleFloat = false;
 589         char lastChar;
 590         if (ch == '.') {
 591             putCh(ch);
 592             ch = in.read();
 593         }
 594 
 595 numberLoop:
 596         for (;; ch = in.read()) {
 597             switch (ch) {
 598                 case '0':
 599                 case '1':
 600                 case '2':
 601                 case '3':
 602                 case '4':
 603                 case '5':
 604                 case '6':
 605                 case '7':
 606                 case '8':
 607                 case '9':
 608                     putCh(ch);
 609                     break;
 610                 case 'e':
 611                 case 'E':
 612                     if (seenExponent) {
 613                         break numberLoop; // we'll get a format error
 614                     }
 615                     putCh(ch);
 616                     seenExponent = true;
 617                     break;
 618                 case '+':
 619                 case '-':
 620                     lastChar = buffer[count - 1];
 621                     if (lastChar != 'e' && lastChar != 'E') {
 622                         break numberLoop; // this isn't an error, though!
 623                     }
 624                     putCh(ch);
 625                     break;
 626                 case 'f':
 627                 case 'F':
 628                     ch = in.read(); // skip over 'f'
 629                     isSingleFloat = true;
 630                     break numberLoop;
 631                 case 'd':
 632                 case 'D':
 633                     ch = in.read(); // skip over 'd'
 634                 // fall through
 635                 default:
 636                     break numberLoop;
 637             } // sswitch
 638         } // loop
 639 
 640         // we have just finished reading the number.  The next thing better
 641         // not be a letter or digit.
 642         if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') {
 643             env.error(in.pos, "invalid.number", Character.toString((char)ch));
 644             do {
 645                 ch = in.read();
 646             } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.');
 647             doubleValue = 0;
 648             token = Token.DOUBLEVAL;
 649         } else {
 650             token = isSingleFloat ? Token.FLOATVAL : Token.DOUBLEVAL;
 651             try {
 652                 lastChar = buffer[count - 1];
 653                 if (lastChar == 'e' || lastChar == 'E'
 654                         || lastChar == '+' || lastChar == '-') {
 655                     env.error(in.pos - 1, "float.format");
 656                 } else if (isSingleFloat) {
 657                     floatValue = Float.valueOf(bufferString());
 658                     if (Float.isInfinite(floatValue)) {
 659                         env.error(pos, "overflow");
 660                     }
 661                 } else {
 662                     doubleValue = Double.valueOf(bufferString());
 663                     if (Double.isInfinite(doubleValue)) {
 664                         env.error(pos, "overflow");
 665                         env.error(pos, "overflow");
 666                     }
 667                 }
 668             } catch (NumberFormatException ee) {
 669                 env.error(pos, "float.format");
 670                 doubleValue = 0;
 671                 floatValue = 0;
 672             }
 673         }
 674     } // scanReal
 675 
 676     /**
 677      * Scan an escape character.
 678      *
 679      * @return the character or '\\'
 680      */
 681     private int scanEscapeChar() throws IOException {
 682         int p = in.pos;
 683 
 684         switch (ch = in.read()) {
 685             case '0':
 686             case '1':
 687             case '2':
 688             case '3':
 689             case '4':
 690             case '5':
 691             case '6':
 692             case '7': {
 693                 int n = ch - '0';
 694                 for (int i = 2; i > 0; i--) {
 695                     switch (ch = in.read()) {
 696                         case '0':
 697                         case '1':
 698                         case '2':
 699                         case '3':
 700                         case '4':
 701                         case '5':
 702                         case '6':
 703                         case '7':
 704                             n = (n << 3) + ch - '0';
 705                             break;
 706                         default:
 707                             if (n > 0xFF) {
 708                                 env.error(p, "invalid.escape.char");
 709                             }
 710                             return n;
 711                     }
 712                 }
 713                 ch = in.read();
 714                 if (n > 0xFF) {
 715                     env.error(p, "invalid.escape.char");
 716                 }
 717                 return n;
 718             }
 719             case 'r':
 720                 ch = in.read();
 721                 return '\r';
 722             case 'n':
 723                 ch = in.read();
 724                 return '\n';
 725             case 'f':
 726                 ch = in.read();
 727                 return '\f';
 728             case 'b':
 729                 ch = in.read();
 730                 return '\b';
 731             case 't':
 732                 ch = in.read();
 733                 return '\t';
 734             case '\\':
 735                 ch = in.read();
 736                 return '\\';
 737             case '\"':
 738                 ch = in.read();
 739                 return '\"';
 740             case '\'':
 741                 ch = in.read();
 742                 return '\'';
 743             case 'u':
 744                 int unich = in.convertUnicode();
 745                 ch = in.read();
 746                 return unich;
 747         }
 748         return '\\';
 749     }
 750 
 751     /**
 752      * Scan a string. The current character should be the opening " of the string.
 753      */
 754     private void scanString() throws IOException {
 755         token = Token.STRINGVAL;
 756         count = 0;
 757         ch = in.read();
 758 
 759         // Scan a String
 760         while (true) {
 761             switch (ch) {
 762                 case EOF:
 763                     env.error(pos, "eof.in.string");
 764                     stringValue = bufferString();
 765                     return;
 766                 case '\n':
 767                     ch = in.read();
 768                     env.error(pos, "newline.in.string");
 769                     stringValue = bufferString();
 770                     return;
 771                 case '"':
 772                     ch = in.read();
 773                     stringValue = bufferString();
 774                     return;
 775                 case '\\': {
 776                     int c = scanEscapeChar();
 777                     if (c >= 0) {
 778                         putCh((char) c);
 779                     }
 780                     break;
 781                 }
 782                 default:
 783                     putCh(ch);
 784                     ch = in.read();
 785                     break;
 786             }
 787         }
 788     }
 789 
 790 
 791     /**
 792      * Scan an Identifier. The current character should be the first character of the
 793      * identifier.
 794      */
 795     private void scanIdentifier(char[] prefix) throws IOException {
 796         int firstChar;
 797         count = 0;
 798         if(prefix != null) {
 799             for(;;) {
 800                 for (int i = 0; i < prefix.length; i++)
 801                     putCh(prefix[i]);
 802                 ch = in.read();
 803                 if (ch == '\\') {
 804                     ch = in.read();
 805                     if (ch == 'u') {
 806                         ch = in.convertUnicode();
 807                         if (!isUCLetter(ch) && !isUCDigit(ch)) {
 808                             prefix = new char[]{(char)ch};
 809                             continue;
 810                         }
 811                     } else if (escapingAllowed.test(ch)) {
 812                         prefix = new char[]{(char)ch};
 813                         continue;
 814                     }
 815                     int p = in.pos;
 816                     env.error(p, "invalid.escape.char");
 817                 }
 818                 break;
 819             }
 820         }
 821         firstChar = ch;
 822         boolean firstIteration = true;
 823 scanloop:
 824         while (true) {
 825             putCh(ch);
 826             ch = in.read();
 827 
 828             // Check to see if the annotation marker is at
 829             // the front of the identifier.
 830             if (firstIteration && firstChar == '@') {
 831                 // May be a type annotation
 832                 if (ch == 'T') {  // type annotation
 833                     putCh(ch);
 834                     ch = in.read();
 835                 }
 836 
 837                 // is either a runtime visible or invisible annotation
 838                 if (ch == '+' || ch == '-') {  // regular annotation
 839                     // possible annotation -
 840                     // need to eat up the '@+' or '@-'
 841                     putCh(ch);
 842                     ch = in.read();
 843                 }
 844                 idValue = bufferString();
 845                 stringValue = idValue;
 846                 token = Token.ANNOTATION;
 847                 return;
 848             }
 849 
 850             firstIteration = false;
 851             switch (ch) {
 852                 case 'a':
 853                 case 'b':
 854                 case 'c':
 855                 case 'd':
 856                 case 'e':
 857                 case 'f':
 858                 case 'g':
 859                 case 'h':
 860                 case 'i':
 861                 case 'j':
 862                 case 'k':
 863                 case 'l':
 864                 case 'm':
 865                 case 'n':
 866                 case 'o':
 867                 case 'p':
 868                 case 'q':
 869                 case 'r':
 870                 case 's':
 871                 case 't':
 872                 case 'u':
 873                 case 'v':
 874                 case 'w':
 875                 case 'x':
 876                 case 'y':
 877                 case 'z':
 878                 case 'A':
 879                 case 'B':
 880                 case 'C':
 881                 case 'D':
 882                 case 'E':
 883                 case 'F':
 884                 case 'G':
 885                 case 'H':
 886                 case 'I':
 887                 case 'J':
 888                 case 'K':
 889                 case 'L':
 890                 case 'M':
 891                 case 'N':
 892                 case 'O':
 893                 case 'P':
 894                 case 'Q':
 895                 case 'R':
 896                 case 'S':
 897                 case 'T':
 898                 case 'U':
 899                 case 'V':
 900                 case 'W':
 901                 case 'X':
 902                 case 'Y':
 903                 case 'Z':
 904                 case '0':
 905                 case '1':
 906                 case '2':
 907                 case '3':
 908                 case '4':
 909                 case '5':
 910                 case '6':
 911                 case '7':
 912                 case '8':
 913                 case '9':
 914                 case '$':
 915                 case '_':
 916                 case '-':
 917                 case '[':
 918                 case ']':
 919                 case '(':
 920                 case ')':
 921                 case '<':
 922                 case '>':
 923                     break;
 924                 case '/': {// may be comment right after identifier
 925                     int c = in.lookForward();
 926                     if ((c == '*') || (c == '/')) {
 927                         break scanloop; // yes, comment
 928                     }
 929                     break; // no, continue to parse identifier
 930                 }
 931                 case '\\':
 932                     ch = in.read();
 933                     if ( ch == 'u') {
 934                         ch = in.convertUnicode();
 935                         if (isUCLetter(ch) || isUCDigit(ch)) {
 936                             break;
 937                         }
 938                     } else if( escapingAllowed.test(ch)) {
 939                         break;
 940                     }
 941                     int p = in.pos;
 942                     env.error(p, "invalid.escape.char");
 943                 default:
 944 //                    if ((!isUCDigit(ch)) && (!isUCLetter(ch))) {
 945                     break scanloop;
 946 //                    }
 947             } // end switch
 948         } // end scanloop
 949         idValue = bufferString();
 950         stringValue = idValue;
 951         token = keyword_token_ident(idValue);
 952         debugStr(format("##### SCANNER (scanIdent) ######## token = %s value = \"%s\"\n", token, idValue));
 953     } // end scanIdentifier
 954 
 955 //==============================
 956     @SuppressWarnings("empty-statement")
 957     protected final void xscan() throws IOException {
 958         docComment = null;
 959 loop:
 960         for (;;) {
 961             pos = in.pos;
 962             switch (ch) {
 963                 case EOF:
 964                     token = Token.EOF;
 965                     break loop;
 966                 case '\n':
 967                 case ' ':
 968                 case '\t':
 969                 case '\f':
 970                     ch = in.read();
 971                     break;
 972                 case '/':
 973                     switch (ch = in.read()) {
 974                         case '/':
 975                             // Parse a // comment
 976                             while (((ch = in.read()) != EOF) && (ch != '\n'));
 977                             break;
 978                         case '*':
 979                             ch = in.read();
 980                             if (ch == '*') {
 981                                 docComment = scanDocComment();
 982                             } else {
 983                                 skipComment();
 984                             }
 985                             break;
 986                         default:
 987                             token = Token.DIV;
 988                             break loop;
 989                     }
 990                     break;
 991                 case '"':
 992                     scanString();
 993                     break loop;
 994                 case '-':
 995                     intValue = -1;
 996                     token = Token.SIGN;
 997                     ch = in.read();
 998                     break loop;
 999                 case '+':
1000                     intValue = +1;
1001                     ch = in.read();
1002                     token = Token.SIGN;
1003                     break loop;
1004                 case '0':
1005                 case '1':
1006                 case '2':
1007                 case '3':
1008                 case '4':
1009                 case '5':
1010                 case '6':
1011                 case '7':
1012                 case '8':
1013                 case '9':
1014                     scanNumber();
1015                     break loop;
1016                 case '.':
1017                     switch (ch = in.read()) {
1018                         case '0':
1019                         case '1':
1020                         case '2':
1021                         case '3':
1022                         case '4':
1023                         case '5':
1024                         case '6':
1025                         case '7':
1026                         case '8':
1027                         case '9':
1028                             count = 0;
1029                             putCh('.');
1030                             scanReal();
1031                             break;
1032                         default:
1033                             token = Token.FIELD;
1034                     }
1035                     break loop;
1036                 case '{':
1037                     ch = in.read();
1038                     token = Token.LBRACE;
1039                     break loop;
1040                 case '}':
1041                     ch = in.read();
1042                     token = Token.RBRACE;
1043                     break loop;
1044                 case ',':
1045                     ch = in.read();
1046                     token = Token.COMMA;
1047                     break loop;
1048                 case ';':
1049                     ch = in.read();
1050                     token = Token.SEMICOLON;
1051                     break loop;
1052                 case ':':
1053                     ch = in.read();
1054                     token = Token.COLON;
1055                     break loop;
1056                 case '=':
1057                     if ((ch = in.read()) == '=') {
1058                         ch = in.read();
1059                         token = Token.EQ;
1060                         break loop;
1061                     }
1062                     token = Token.ASSIGN;
1063                     break loop;
1064                 case 'a':
1065                 case 'b':
1066                 case 'c':
1067                 case 'd':
1068                 case 'e':
1069                 case 'f':
1070                 case 'g':
1071                 case 'h':
1072                 case 'i':
1073                 case 'j':
1074                 case 'k':
1075                 case 'l':
1076                 case 'm':
1077                 case 'n':
1078                 case 'o':
1079                 case 'p':
1080                 case 'q':
1081                 case 'r':
1082                 case 's':
1083                 case 't':
1084                 case 'u':
1085                 case 'v':
1086                 case 'w':
1087                 case 'x':
1088                 case 'y':
1089                 case 'z':
1090                 case 'A':
1091                 case 'B':
1092                 case 'C':
1093                 case 'D':
1094                 case 'E':
1095                 case 'F':
1096                 case 'G':
1097                 case 'H':
1098                 case 'I':
1099                 case 'J':
1100                 case 'K':
1101                 case 'L':
1102                 case 'M':
1103                 case 'N':
1104                 case 'O':
1105                 case 'P':
1106                 case 'Q':
1107                 case 'R':
1108                 case 'S':
1109                 case 'T':
1110                 case 'U':
1111                 case 'V':
1112                 case 'W':
1113                 case 'X':
1114                 case 'Y':
1115                 case 'Z':
1116                 case '$':
1117                 case '_':
1118                 case '@':
1119                 case '[':
1120                 case ']':
1121                 case '(':
1122                 case ')':
1123                 case '<':
1124                 case '>':
1125                     scanIdentifier(null);
1126                     break loop;
1127                 case '\u001a':
1128                     // Our one concession to DOS.
1129                     if ((ch = in.read()) == EOF) {
1130                         token = Token.EOF;
1131                         break loop;
1132                     }
1133                     env.error(pos, "funny.char");
1134                     ch = in.read();
1135                     break;
1136                 case '#':
1137                     int c = in.lookForward();
1138                     if (c == '{') {
1139                         // '#' char denotes a "paramMethod name" token
1140                         ch = in.read();
1141                         token = Token.PARAM_NAME;
1142                         break loop;
1143                     }
1144                     // otherwise, it is a normal cpref
1145                     scanCPRef();
1146                     break loop;
1147                 case '\\':
1148                     ch = in.read();
1149                     if ( ch == 'u') {
1150                         ch = in.convertUnicode();
1151                         if (isUCLetter(ch)) {
1152                             scanIdentifier(null);
1153                             break loop;
1154                         }
1155                     } else if( escapingAllowed.test(ch)) {
1156                         scanIdentifier(new char[]{'\\', (char)ch});
1157                         break loop;
1158                     }
1159 //                    if ((ch = in.read()) == 'u') {
1160 //                        ch = in.convertUnicode();
1161 //                        if (isUCLetter(ch)) {
1162 //                            scanIdentifier();
1163 //                            break loop;
1164 //                        }
1165 //                    }
1166                 default:
1167                     env.out.println("funny.char:" + env.lineNumber(pos) + "/" + (pos & ((1 << OFFSETBITS) - 1)));
1168                     env.error(pos, "funny.char");
1169                     ch = in.read();
1170             }
1171         }
1172     }
1173 
1174     @Override
1175     protected void debugScan(String dbstr) {
1176         if (token == null) {
1177             env.traceln(dbstr + "<<<NULL TOKEN>>>");
1178             return;
1179         }
1180         env.trace(dbstr + token);
1181         switch (token) {
1182             case IDENT:
1183                 env.traceln(" = '" + stringValue + "' {idValue = '" + idValue + "'}");
1184                 break;
1185             case STRINGVAL:
1186                 env.traceln(" = {stringValue}: \"" + stringValue + "\"");
1187                 break;
1188             case INTVAL:
1189                 env.traceln(" = {intValue}: " + intValue + "}");
1190                 break;
1191             case FLOATVAL:
1192                 env.traceln(" = {floatValue}: " + floatValue);
1193                 break;
1194             case DOUBLEVAL:
1195                 env.traceln(" = {doubleValue}: " + doubleValue);
1196                 break;
1197             default:
1198                 env.traceln("");
1199         }
1200     }
1201 
1202     private Predicate<Integer> noFunc = (ch)-> false;
1203     private Predicate<Integer> yesAndProcessFunc = (ch) -> {
1204         boolean res = ((ch == '\\') || (ch == ':') || (ch == '@'));
1205         if (res)
1206             putCh('\\');
1207         return res;
1208     };
1209 }