1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;
  33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  36 import com.sun.tools.javac.util.*;
  37 import com.sun.tools.javac.util.JCDiagnostic.*;
  38 
  39 import java.lang.reflect.InvocationTargetException;
  40 import java.lang.reflect.Method;
  41 import java.nio.CharBuffer;
  42 import java.util.HashSet;
  43 import java.util.Set;
  44 
  45 import static com.sun.tools.javac.parser.Tokens.*;
  46 import static com.sun.tools.javac.util.LayoutCharacters.*;
  47 
  48 /** The lexical analyzer maps an input stream consisting of
  49  *  ASCII characters and Unicode escapes into a token sequence.
  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer {
  57 
  58     private static final boolean scannerDebug = false;
  59 
  60     /** The source language setting.
  61      */
  62     private Source source;
  63 
  64     /** The preview language setting. */
  65     private Preview preview;
  66 
  67     /** The log to be used for error reporting.
  68      */
  69     private final Log log;
  70 
  71     /** The token factory. */
  72     private final Tokens tokens;
  73 
  74     /** The token kind, set by nextToken().
  75      */
  76     protected TokenKind tk;
  77 
  78     /** The token's radix, set by nextToken().
  79      */
  80     protected int radix;
  81 
  82     /** The token's name, set by nextToken().
  83      */
  84     protected Name name;
  85 
  86     /** The position where a lexical error occurred;
  87      */
  88     protected int errPos = Position.NOPOS;
  89 
  90     /** The Unicode reader (low-level stream reader).
  91      */
  92     protected UnicodeReader reader;
  93 
  94     /** Should the string stripped of indentation?
  95      */
  96     protected boolean shouldStripIndent;
  97 
  98     /** Should the string's escapes be translated?
  99      */
 100     protected boolean shouldTranslateEscapes;
 101 
 102     protected ScannerFactory fac;
 103 
 104     // The set of lint options currently in effect. It is initialized
 105     // from the context, and then is set/reset as needed by Attr as it
 106     // visits all the various parts of the trees during attribution.
 107     protected Lint lint;
 108 
 109     private static final boolean hexFloatsWork = hexFloatsWork();
 110     private static boolean hexFloatsWork() {
 111         try {
 112             Float.valueOf("0x1.0p1");
 113             return true;
 114         } catch (NumberFormatException ex) {
 115             return false;
 116         }
 117     }
 118 
 119     /**
 120      * Create a scanner from the input array.  This method might
 121      * modify the array.  To avoid copying the input array, ensure
 122      * that {@code inputLength < input.length} or
 123      * {@code input[input.length -1]} is a white space character.
 124      *
 125      * @param fac the factory which created this Scanner
 126      * @param buf the input, might be modified
 127      * Must be positive and less than or equal to input.length.
 128      */
 129     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 130         this(fac, new UnicodeReader(fac, buf));
 131     }
 132 
 133     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 134         this(fac, new UnicodeReader(fac, buf, inputLength));
 135     }
 136 
 137     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 138         this.fac = fac;
 139         this.log = fac.log;
 140         this.tokens = fac.tokens;
 141         this.source = fac.source;
 142         this.preview = fac.preview;
 143         this.reader = reader;
 144         this.lint = fac.lint;
 145     }
 146 
 147     protected void checkSourceLevel(int pos, Feature feature) {
 148         if (preview.isPreview(feature) && !preview.isEnabled()) {
 149             //preview feature without --preview flag, error
 150             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 151         } else if (!feature.allowedInSource(source)) {
 152             //incompatible source level, error
 153             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 154         } else if (preview.isPreview(feature)) {
 155             //use of preview feature, warn
 156             preview.warnPreview(pos, feature);
 157         }
 158     }
 159 
 160     /** Report an error at the given position using the provided arguments.
 161      */
 162     protected void lexError(int pos, JCDiagnostic.Error key) {
 163         log.error(pos, key);
 164         tk = TokenKind.ERROR;
 165         errPos = pos;
 166     }
 167 
 168     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 169         log.error(flags, pos, key);
 170         tk = TokenKind.ERROR;
 171         errPos = pos;
 172     }
 173 
 174     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 175         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 176         log.warning(lc, dp, key);
 177     }
 178 
 179     /** Read next character in character or string literal and copy into sbuf.
 180      */
 181     private void scanLitChar(int pos) {
 182         if (reader.ch == '\\') {
 183             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 184                 reader.skipChar();
 185                 reader.putChar('\\', true);
 186             } else {
 187                 reader.scanChar();
 188                 switch (reader.ch) {
 189                 case '0': case '1': case '2': case '3':
 190                 case '4': case '5': case '6': case '7':
 191                     char leadch = reader.ch;
 192                     int oct = reader.digit(pos, 8);
 193                     reader.scanChar();
 194                     if ('0' <= reader.ch && reader.ch <= '7') {
 195                         oct = oct * 8 + reader.digit(pos, 8);
 196                         reader.scanChar();
 197                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 198                             oct = oct * 8 + reader.digit(pos, 8);
 199                             reader.scanChar();
 200                         }
 201                     }
 202                     reader.putChar((char)oct);
 203                     break;
 204                 case 'b':
 205                     reader.putChar('\b', true); break;
 206                 case 't':
 207                     reader.putChar('\t', true); break;
 208                 case 'n':
 209                     reader.putChar('\n', true); break;
 210                 case 'f':
 211                     reader.putChar('\f', true); break;
 212                 case 'r':
 213                     reader.putChar('\r', true); break;
 214                 case '\'':
 215                     reader.putChar('\'', true); break;
 216                 case '\"':
 217                     reader.putChar('\"', true); break;
 218                 case '\\':
 219                     reader.putChar('\\', true); break;
 220                 default:
 221                     lexError(reader.bp, Errors.IllegalEscChar);
 222                 }
 223             }
 224         } else if (reader.bp != reader.buflen) {
 225             reader.putChar(true);
 226         }
 227     }
 228 
 229     /** Read next character in character or string literal and copy into sbuf
 230      *  without translating escapes. Used by text blocks to preflight verify
 231      *  escapes sequences.
 232      */
 233     private void scanLitCharRaw(int pos) {
 234         if (reader.ch == '\\') {
 235             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 236                 reader.skipChar();
 237                 reader.putChar('\\', false);
 238                 reader.putChar('\\', true);
 239             } else {
 240                 reader.putChar('\\', true);
 241                 switch (reader.ch) {
 242                 case '0': case '1': case '2': case '3':
 243                 case '4': case '5': case '6': case '7':
 244                     char leadch = reader.ch;
 245                     reader.putChar(true);
 246                     if ('0' <= reader.ch && reader.ch <= '7') {
 247                         reader.putChar(true);
 248                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 249                             reader.putChar(true);
 250                         }
 251                     }
 252                     break;
 253                 // Effectively list of valid escape sequences.
 254                 case 'b':
 255                 case 't':
 256                 case 'n':
 257                 case 'f':
 258                 case 'r':
 259                 case '\'':
 260                 case '\"':
 261                 case '\\':
 262                     reader.putChar(true); break;
 263                 default:
 264                     lexError(reader.bp, Errors.IllegalEscChar);
 265                 }
 266             }
 267         } else if (reader.bp != reader.buflen) {
 268             reader.putChar(true);
 269         }
 270     }
 271 
 272     /** Interim access to String methods used to support text blocks.
 273      *  Required to handle bootstrapping with pre-text block jdks.
 274      *  Could be reworked in the 'next' jdk.
 275      */
 276     static class TextBlockSupport {
 277         /** Reflection method to remove incidental indentation.
 278          */
 279         private static final Method stripIndent;
 280 
 281         /** Reflection method to translate escape sequences.
 282          */
 283         private static final Method translateEscapes;
 284 
 285         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
 286          */
 287         private static final boolean hasSupport;
 288 
 289         /** Get a string method via refection or null if not available.
 290          */
 291         private static Method getStringMethodOrNull(String name) {
 292             try {
 293                 return String.class.getMethod(name);
 294             } catch (Exception ex) {
 295                 // Method not available, return null.
 296             }
 297             return null;
 298         }
 299 
 300         static {
 301             // Get text block string methods.
 302             stripIndent = getStringMethodOrNull("stripIndent");
 303             translateEscapes = getStringMethodOrNull("translateEscapes");
 304             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
 305             hasSupport = stripIndent != null && translateEscapes != null;
 306         }
 307 
 308         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
 309          */
 310         static boolean hasSupport() {
 311             return hasSupport;
 312         }
 313 
 314         /** Return the leading whitespace count (indentation) of the line.
 315          */
 316         private static int indent(String line) {
 317             return line.length() - line.stripLeading().length();
 318         }
 319 
 320         enum WhitespaceChecks {
 321             INCONSISTENT,
 322             TRAILING
 323         };
 324 
 325         /** Check that the use of white space in content is not problematic.
 326          */
 327         static Set<WhitespaceChecks> checkWhitespace(String string) {
 328             // Start with empty result set.
 329             Set<WhitespaceChecks> checks = new HashSet<>();
 330             // No need to check empty strings.
 331             if (string.isEmpty()) {
 332                 return checks;
 333             }
 334             // Maximum common indentation.
 335             int outdent = 0;
 336             // No need to check indentation if opting out (last line is empty.)
 337             char lastChar = string.charAt(string.length() - 1);
 338             boolean optOut = lastChar == '\n' || lastChar == '\r';
 339             // Split string based at line terminators.
 340             String[] lines = string.split("\\R");
 341             int length = lines.length;
 342             // Extract last line.
 343             String lastLine = lines[length - 1];
 344             if (!optOut) {
 345                 // Prime with the last line indentation (may be blank.)
 346                 outdent = indent(lastLine);
 347                 for (String line : lines) {
 348                     // Blanks lines have no influence (last line accounted for.)
 349                     if (!line.isBlank()) {
 350                         outdent = Integer.min(outdent, indent(line));
 351                         if (outdent == 0) {
 352                             break;
 353                         }
 354                     }
 355                 }
 356             }
 357             // Last line is representative.
 358             String start = lastLine.substring(0, outdent);
 359             for (String line : lines) {
 360                 // Fail if a line does not have the same indentation.
 361                 if (!line.isBlank() && !line.startsWith(start)) {
 362                     // Mix of different white space
 363                     checks.add(WhitespaceChecks.INCONSISTENT);
 364                 }
 365                 // Line has content even after indent is removed.
 366                 if (outdent < line.length()) {
 367                     // Is the last character a white space.
 368                     lastChar = line.charAt(line.length() - 1);
 369                     if (Character.isWhitespace(lastChar)) {
 370                         // Has trailing white space.
 371                         checks.add(WhitespaceChecks.TRAILING);
 372                     }
 373                 }
 374             }
 375             return checks;
 376         }
 377 
 378         /** Invoke String::stripIndent through reflection.
 379          */
 380         static String stripIndent(String string) {
 381             try {
 382                 string = (String)stripIndent.invoke(string);
 383             } catch (InvocationTargetException | IllegalAccessException ex) {
 384                 throw new RuntimeException(ex);
 385             }
 386             return string;
 387         }
 388 
 389         /** Invoke String::translateEscapes through reflection.
 390          */
 391         static String translateEscapes(String string) {
 392             try {
 393                 string = (String)translateEscapes.invoke(string);
 394             } catch (InvocationTargetException | IllegalAccessException ex) {
 395                 throw new RuntimeException(ex);
 396             }
 397             return string;
 398         }
 399     }
 400 
 401     /** Test for EOLN.
 402      */
 403     private boolean isEOLN() {
 404         return reader.ch == LF || reader.ch == CR;
 405     }
 406 
 407     /** Test for CRLF.
 408      */
 409     private boolean isCRLF() {
 410         return reader.ch == CR && reader.peekChar() == LF;
 411     }
 412 
 413     /** Count and skip repeated occurances of the specified character.
 414      */
 415     private int countChar(char ch, int max) {
 416         int count = 0;
 417         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
 418             reader.scanChar();
 419         }
 420         return count;
 421     }
 422 
 423     /** Scan a string literal or text block.
 424      */
 425     private void scanString(int pos) {
 426         // Clear flags.
 427         shouldStripIndent = false;
 428         shouldTranslateEscapes = false;
 429         // Check if text block string methods are present.
 430         boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
 431         // Track the end of first line for error recovery.
 432         int firstEOLN = -1;
 433         // Attempt to scan for up to 3 double quotes.
 434         int openCount = countChar('\"', 3);
 435         switch (openCount) {
 436         case 1: // Starting a string literal.
 437             break;
 438         case 2: // Starting an empty string literal.
 439             // Start again but only consume one quote.
 440             reader.reset(pos);
 441             openCount = countChar('\"', 1);
 442             break;
 443         case 3: // Starting a text block.
 444             // Check if preview feature is enabled for text blocks.
 445             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 446             // Only proceed if text block string methods are present.
 447             if (hasTextBlockSupport) {
 448                 // Indicate that the final string should have incidental indentation removed.
 449                 shouldStripIndent = true;
 450                 // Verify the open delimiter sequence.
 451                 boolean hasOpenEOLN = false;
 452                 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
 453                     hasOpenEOLN = isEOLN();
 454                     if (hasOpenEOLN) {
 455                         break;
 456                     }
 457                     reader.scanChar();
 458                 }
 459                 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
 460                 if (!hasOpenEOLN) {
 461                     lexError(reader.bp, Errors.IllegalTextBlockOpen);
 462                     return;
 463                 }
 464                 // Skip line terminator.
 465                 int start = reader.bp;
 466                 if (isCRLF()) {
 467                     reader.scanChar();
 468                 }
 469                 reader.scanChar();
 470                 processLineTerminator(start, reader.bp);
 471             } else {
 472                 // No text block string methods are present, so reset and treat like string literal.
 473                 reader.reset(pos);
 474                 openCount = countChar('\"', 1);
 475             }
 476             break;
 477         }
 478         // While characters are available.
 479         while (reader.bp < reader.buflen) {
 480             // If possible close delimiter sequence.
 481             if (reader.ch == '\"') {
 482                 // Check to see if enough double quotes are present.
 483                 int closeCount = countChar('\"', openCount);
 484                 if (openCount == closeCount) {
 485                     // Good result.
 486                     tk = Tokens.TokenKind.STRINGLITERAL;
 487                     return;
 488                 }
 489                 // False alarm, add double quotes to string buffer.
 490                 reader.repeat('\"', closeCount);
 491             } else if (isEOLN()) {
 492                 // Line terminator in string literal is an error.
 493                 // Fall out to unclosed string literal error.
 494                 if (openCount == 1) {
 495                     break;
 496                 }
 497                  // Add line terminator to string buffer.
 498                 int start = reader.bp;
 499                 if (isCRLF()) {
 500                     reader.scanChar();
 501                 }
 502                 reader.putChar('\n', true);
 503                 processLineTerminator(start, reader.bp);
 504                 // Record first line terminator for error recovery.
 505                 if (firstEOLN == -1) {
 506                     firstEOLN = reader.bp;
 507                 }
 508             } else if (reader.ch == '\\') {
 509                 // Handle escape sequences.
 510                 if (hasTextBlockSupport) {
 511                     // Indicate that the final string should have escapes translated.
 512                     shouldTranslateEscapes = true;
 513                     // Validate escape sequence and add to string buffer.
 514                     scanLitCharRaw(pos);
 515                 } else {
 516                     // Translate escape sequence and add result to string buffer.
 517                     scanLitChar(pos);
 518                 }
 519             } else {
 520                 // Add character to string buffer.
 521                 reader.putChar(true);
 522             }
 523         }
 524         // String ended without close delimiter sequence.
 525         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
 526         if (firstEOLN  != -1) {
 527             // Reset recovery position to point after open delimiter sequence.
 528             reader.reset(firstEOLN);
 529         }
 530     }
 531 
 532     private void scanDigits(int pos, int digitRadix) {
 533         char saveCh;
 534         int savePos;
 535         do {
 536             if (reader.ch != '_') {
 537                 reader.putChar(false);
 538             }
 539             saveCh = reader.ch;
 540             savePos = reader.bp;
 541             reader.scanChar();
 542         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 543         if (saveCh == '_')
 544             lexError(savePos, Errors.IllegalUnderscore);
 545     }
 546 
 547     /** Read fractional part of hexadecimal floating point number.
 548      */
 549     private void scanHexExponentAndSuffix(int pos) {
 550         if (reader.ch == 'p' || reader.ch == 'P') {
 551             reader.putChar(true);
 552             skipIllegalUnderscores();
 553             if (reader.ch == '+' || reader.ch == '-') {
 554                 reader.putChar(true);
 555             }
 556             skipIllegalUnderscores();
 557             if (reader.digit(pos, 10) >= 0) {
 558                 scanDigits(pos, 10);
 559                 if (!hexFloatsWork)
 560                     lexError(pos, Errors.UnsupportedCrossFpLit);
 561             } else
 562                 lexError(pos, Errors.MalformedFpLit);
 563         } else {
 564             lexError(pos, Errors.MalformedFpLit);
 565         }
 566         if (reader.ch == 'f' || reader.ch == 'F') {
 567             reader.putChar(true);
 568             tk = TokenKind.FLOATLITERAL;
 569             radix = 16;
 570         } else {
 571             if (reader.ch == 'd' || reader.ch == 'D') {
 572                 reader.putChar(true);
 573             }
 574             tk = TokenKind.DOUBLELITERAL;
 575             radix = 16;
 576         }
 577     }
 578 
 579     /** Read fractional part of floating point number.
 580      */
 581     private void scanFraction(int pos) {
 582         skipIllegalUnderscores();
 583         if (reader.digit(pos, 10) >= 0) {
 584             scanDigits(pos, 10);
 585         }
 586         int sp1 = reader.sp;
 587         if (reader.ch == 'e' || reader.ch == 'E') {
 588             reader.putChar(true);
 589             skipIllegalUnderscores();
 590             if (reader.ch == '+' || reader.ch == '-') {
 591                 reader.putChar(true);
 592             }
 593             skipIllegalUnderscores();
 594             if (reader.digit(pos, 10) >= 0) {
 595                 scanDigits(pos, 10);
 596                 return;
 597             }
 598             lexError(pos, Errors.MalformedFpLit);
 599             reader.sp = sp1;
 600         }
 601     }
 602 
 603     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 604      */
 605     private void scanFractionAndSuffix(int pos) {
 606         radix = 10;
 607         scanFraction(pos);
 608         if (reader.ch == 'f' || reader.ch == 'F') {
 609             reader.putChar(true);
 610             tk = TokenKind.FLOATLITERAL;
 611         } else {
 612             if (reader.ch == 'd' || reader.ch == 'D') {
 613                 reader.putChar(true);
 614             }
 615             tk = TokenKind.DOUBLELITERAL;
 616         }
 617     }
 618 
 619     /** Read fractional part and 'd' or 'f' suffix of floating point number.
 620      */
 621     private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
 622         radix = 16;
 623         Assert.check(reader.ch == '.');
 624         reader.putChar(true);
 625         skipIllegalUnderscores();
 626         if (reader.digit(pos, 16) >= 0) {
 627             seendigit = true;
 628             scanDigits(pos, 16);
 629         }
 630         if (!seendigit)
 631             lexError(pos, Errors.InvalidHexNumber);
 632         else
 633             scanHexExponentAndSuffix(pos);
 634     }
 635 
 636     private void skipIllegalUnderscores() {
 637         if (reader.ch == '_') {
 638             lexError(reader.bp, Errors.IllegalUnderscore);
 639             while (reader.ch == '_')
 640                 reader.scanChar();
 641         }
 642     }
 643 
 644     /** Read a number.
 645      *  @param radix  The radix of the number; one of 2, 8, 10, 16.
 646      */
 647     private void scanNumber(int pos, int radix) {
 648         // for octal, allow base-10 digit in case it's a float literal
 649         this.radix = radix;
 650         int digitRadix = (radix == 8 ? 10 : radix);
 651         int firstDigit = reader.digit(pos, Math.max(10, digitRadix));
 652         boolean seendigit = firstDigit >= 0;
 653         boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
 654         if (seendigit) {
 655             scanDigits(pos, digitRadix);
 656         }
 657         if (radix == 16 && reader.ch == '.') {
 658             scanHexFractionAndSuffix(pos, seendigit);
 659         } else if (seendigit && radix == 16 && (reader.ch == 'p' || reader.ch == 'P')) {
 660             scanHexExponentAndSuffix(pos);
 661         } else if (digitRadix == 10 && reader.ch == '.') {
 662             reader.putChar(true);
 663             scanFractionAndSuffix(pos);
 664         } else if (digitRadix == 10 &&
 665                    (reader.ch == 'e' || reader.ch == 'E' ||
 666                     reader.ch == 'f' || reader.ch == 'F' ||
 667                     reader.ch == 'd' || reader.ch == 'D')) {
 668             scanFractionAndSuffix(pos);
 669         } else {
 670             if (!seenValidDigit) {
 671                 switch (radix) {
 672                 case 2:
 673                     lexError(pos, Errors.InvalidBinaryNumber);
 674                     break;
 675                 case 16:
 676                     lexError(pos, Errors.InvalidHexNumber);
 677                     break;
 678                 }
 679             }
 680             if (reader.ch == 'l' || reader.ch == 'L') {
 681                 reader.scanChar();
 682                 tk = TokenKind.LONGLITERAL;
 683             } else {
 684                 tk = TokenKind.INTLITERAL;
 685             }
 686         }
 687     }
 688 
 689     /** Read an identifier.
 690      */
 691     private void scanIdent() {
 692         boolean isJavaIdentifierPart;
 693         char high;
 694         reader.putChar(true);
 695         do {
 696             switch (reader.ch) {
 697             case 'A': case 'B': case 'C': case 'D': case 'E':
 698             case 'F': case 'G': case 'H': case 'I': case 'J':
 699             case 'K': case 'L': case 'M': case 'N': case 'O':
 700             case 'P': case 'Q': case 'R': case 'S': case 'T':
 701             case 'U': case 'V': case 'W': case 'X': case 'Y':
 702             case 'Z':
 703             case 'a': case 'b': case 'c': case 'd': case 'e':
 704             case 'f': case 'g': case 'h': case 'i': case 'j':
 705             case 'k': case 'l': case 'm': case 'n': case 'o':
 706             case 'p': case 'q': case 'r': case 's': case 't':
 707             case 'u': case 'v': case 'w': case 'x': case 'y':
 708             case 'z':
 709             case '$': case '_':
 710             case '0': case '1': case '2': case '3': case '4':
 711             case '5': case '6': case '7': case '8': case '9':
 712                 break;
 713             case '\u0000': case '\u0001': case '\u0002': case '\u0003':
 714             case '\u0004': case '\u0005': case '\u0006': case '\u0007':
 715             case '\u0008': case '\u000E': case '\u000F': case '\u0010':
 716             case '\u0011': case '\u0012': case '\u0013': case '\u0014':
 717             case '\u0015': case '\u0016': case '\u0017':
 718             case '\u0018': case '\u0019': case '\u001B':
 719             case '\u007F':
 720                 reader.scanChar();
 721                 continue;
 722             case '\u001A': // EOI is also a legal identifier part
 723                 if (reader.bp >= reader.buflen) {
 724                     name = reader.name();
 725                     tk = tokens.lookupKind(name);
 726                     return;
 727                 }
 728                 reader.scanChar();
 729                 continue;
 730             default:
 731                 if (reader.ch < '\u0080') {
 732                     // all ASCII range chars already handled, above
 733                     isJavaIdentifierPart = false;
 734                 } else {
 735                     if (Character.isIdentifierIgnorable(reader.ch)) {
 736                         reader.scanChar();
 737                         continue;
 738                     } else {
 739                         int codePoint = reader.peekSurrogates();
 740                         if (codePoint >= 0) {
 741                             if (isJavaIdentifierPart = Character.isJavaIdentifierPart(codePoint)) {
 742                                 reader.putChar(true);
 743                             }
 744                         } else {
 745                             isJavaIdentifierPart = Character.isJavaIdentifierPart(reader.ch);
 746                         }
 747                     }
 748                 }
 749                 if (!isJavaIdentifierPart) {
 750                     name = reader.name();
 751                     tk = tokens.lookupKind(name);
 752                     return;
 753                 }
 754             }
 755             reader.putChar(true);
 756         } while (true);
 757     }
 758 
 759     /** Return true if reader.ch can be part of an operator.
 760      */
 761     private boolean isSpecial(char ch) {
 762         switch (ch) {
 763         case '!': case '%': case '&': case '*': case '?':
 764         case '+': case '-': case ':': case '<': case '=':
 765         case '>': case '^': case '|': case '~':
 766         case '@':
 767             return true;
 768         default:
 769             return false;
 770         }
 771     }
 772 
 773     /** Read longest possible sequence of special characters and convert
 774      *  to token.
 775      */
 776     private void scanOperator() {
 777         while (true) {
 778             reader.putChar(false);
 779             Name newname = reader.name();
 780             TokenKind tk1 = tokens.lookupKind(newname);
 781             if (tk1 == TokenKind.IDENTIFIER) {
 782                 reader.sp--;
 783                 break;
 784             }
 785             tk = tk1;
 786             reader.scanChar();
 787             if (!isSpecial(reader.ch)) break;
 788         }
 789     }
 790 
 791     /** Read token.
 792      */
 793     public Token readToken() {
 794 
 795         reader.sp = 0;
 796         name = null;
 797         radix = 0;
 798 
 799         int pos = 0;
 800         int endPos = 0;
 801         List<Comment> comments = null;
 802 
 803         try {
 804             loop: while (true) {
 805                 pos = reader.bp;
 806                 switch (reader.ch) {
 807                 case ' ': // (Spec 3.6)
 808                 case '\t': // (Spec 3.6)
 809                 case FF: // (Spec 3.6)
 810                     do {
 811                         reader.scanChar();
 812                     } while (reader.ch == ' ' || reader.ch == '\t' || reader.ch == FF);
 813                     processWhiteSpace(pos, reader.bp);
 814                     break;
 815                 case LF: // (Spec 3.4)
 816                     reader.scanChar();
 817                     processLineTerminator(pos, reader.bp);
 818                     break;
 819                 case CR: // (Spec 3.4)
 820                     reader.scanChar();
 821                     if (reader.ch == LF) {
 822                         reader.scanChar();
 823                     }
 824                     processLineTerminator(pos, reader.bp);
 825                     break;
 826                 case 'A': case 'B': case 'C': case 'D': case 'E':
 827                 case 'F': case 'G': case 'H': case 'I': case 'J':
 828                 case 'K': case 'L': case 'M': case 'N': case 'O':
 829                 case 'P': case 'Q': case 'R': case 'S': case 'T':
 830                 case 'U': case 'V': case 'W': case 'X': case 'Y':
 831                 case 'Z':
 832                 case 'a': case 'b': case 'c': case 'd': case 'e':
 833                 case 'f': case 'g': case 'h': case 'i': case 'j':
 834                 case 'k': case 'l': case 'm': case 'n': case 'o':
 835                 case 'p': case 'q': case 'r': case 's': case 't':
 836                 case 'u': case 'v': case 'w': case 'x': case 'y':
 837                 case 'z':
 838                 case '$': case '_':
 839                     scanIdent();
 840                     break loop;
 841                 case '0':
 842                     reader.scanChar();
 843                     if (reader.ch == 'x' || reader.ch == 'X') {
 844                         reader.scanChar();
 845                         skipIllegalUnderscores();
 846                         scanNumber(pos, 16);
 847                     } else if (reader.ch == 'b' || reader.ch == 'B') {
 848                         reader.scanChar();
 849                         skipIllegalUnderscores();
 850                         scanNumber(pos, 2);
 851                     } else {
 852                         reader.putChar('0');
 853                         if (reader.ch == '_') {
 854                             int savePos = reader.bp;
 855                             do {
 856                                 reader.scanChar();
 857                             } while (reader.ch == '_');
 858                             if (reader.digit(pos, 10) < 0) {
 859                                 lexError(savePos, Errors.IllegalUnderscore);
 860                             }
 861                         }
 862                         scanNumber(pos, 8);
 863                     }
 864                     break loop;
 865                 case '1': case '2': case '3': case '4':
 866                 case '5': case '6': case '7': case '8': case '9':
 867                     scanNumber(pos, 10);
 868                     break loop;
 869                 case '.':
 870                     reader.scanChar();
 871                     if (reader.digit(pos, 10) >= 0) {
 872                         reader.putChar('.');
 873                         scanFractionAndSuffix(pos);
 874                     } else if (reader.ch == '.') {
 875                         int savePos = reader.bp;
 876                         reader.putChar('.'); reader.putChar('.', true);
 877                         if (reader.ch == '.') {
 878                             reader.scanChar();
 879                             reader.putChar('.');
 880                             tk = TokenKind.ELLIPSIS;
 881                         } else {
 882                             lexError(savePos, Errors.IllegalDot);
 883                         }
 884                     } else {
 885                         tk = TokenKind.DOT;
 886                     }
 887                     break loop;
 888                 case ',':
 889                     reader.scanChar(); tk = TokenKind.COMMA; break loop;
 890                 case ';':
 891                     reader.scanChar(); tk = TokenKind.SEMI; break loop;
 892                 case '(':
 893                     reader.scanChar(); tk = TokenKind.LPAREN; break loop;
 894                 case ')':
 895                     reader.scanChar(); tk = TokenKind.RPAREN; break loop;
 896                 case '[':
 897                     reader.scanChar(); tk = TokenKind.LBRACKET; break loop;
 898                 case ']':
 899                     reader.scanChar(); tk = TokenKind.RBRACKET; break loop;
 900                 case '{':
 901                     reader.scanChar(); tk = TokenKind.LBRACE; break loop;
 902                 case '}':
 903                     reader.scanChar(); tk = TokenKind.RBRACE; break loop;
 904                 case '/':
 905                     reader.scanChar();
 906                     if (reader.ch == '/') {
 907                         do {
 908                             reader.scanCommentChar();
 909                         } while (reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen);
 910                         if (reader.bp < reader.buflen) {
 911                             comments = addComment(comments, processComment(pos, reader.bp, CommentStyle.LINE));
 912                         }
 913                         break;
 914                     } else if (reader.ch == '*') {
 915                         boolean isEmpty = false;
 916                         reader.scanChar();
 917                         CommentStyle style;
 918                         if (reader.ch == '*') {
 919                             style = CommentStyle.JAVADOC;
 920                             reader.scanCommentChar();
 921                             if (reader.ch == '/') {
 922                                 isEmpty = true;
 923                             }
 924                         } else {
 925                             style = CommentStyle.BLOCK;
 926                         }
 927                         while (!isEmpty && reader.bp < reader.buflen) {
 928                             if (reader.ch == '*') {
 929                                 reader.scanChar();
 930                                 if (reader.ch == '/') break;
 931                             } else {
 932                                 reader.scanCommentChar();
 933                             }
 934                         }
 935                         if (reader.ch == '/') {
 936                             reader.scanChar();
 937                             comments = addComment(comments, processComment(pos, reader.bp, style));
 938                             break;
 939                         } else {
 940                             lexError(pos, Errors.UnclosedComment);
 941                             break loop;
 942                         }
 943                     } else if (reader.ch == '=') {
 944                         tk = TokenKind.SLASHEQ;
 945                         reader.scanChar();
 946                     } else {
 947                         tk = TokenKind.SLASH;
 948                     }
 949                     break loop;
 950                 case '\'':
 951                     reader.scanChar();
 952                     if (reader.ch == '\'') {
 953                         lexError(pos, Errors.EmptyCharLit);
 954                         reader.scanChar();
 955                     } else {
 956                         if (isEOLN())
 957                             lexError(pos, Errors.IllegalLineEndInCharLit);
 958                         scanLitChar(pos);
 959                         if (reader.ch == '\'') {
 960                             reader.scanChar();
 961                             tk = TokenKind.CHARLITERAL;
 962                         } else {
 963                             lexError(pos, Errors.UnclosedCharLit);
 964                         }
 965                     }
 966                     break loop;
 967                 case '\"':
 968                     scanString(pos);
 969                     break loop;
 970                 default:
 971                     if (isSpecial(reader.ch)) {
 972                         scanOperator();
 973                     } else {
 974                         boolean isJavaIdentifierStart;
 975                         int codePoint = -1;
 976                         if (reader.ch < '\u0080') {
 977                             // all ASCII range chars already handled, above
 978                             isJavaIdentifierStart = false;
 979                         } else {
 980                             codePoint = reader.peekSurrogates();
 981                             if (codePoint >= 0) {
 982                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 983                                     reader.putChar(true);
 984                                 }
 985                             } else {
 986                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 987                             }
 988                         }
 989                         if (isJavaIdentifierStart) {
 990                             scanIdent();
 991                         } else if (reader.digit(pos, 10) >= 0) {
 992                             scanNumber(pos, 10);
 993                         } else if (reader.bp == reader.buflen || reader.ch == EOI && reader.bp + 1 == reader.buflen) { // JLS 3.5
 994                             tk = TokenKind.EOF;
 995                             pos = reader.realLength;
 996                         } else {
 997                             String arg;
 998 
 999                             if (codePoint >= 0) {
1000                                 char high = reader.ch;
1001                                 reader.scanChar();
1002                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
1003                             } else {
1004                                 arg = (32 < reader.ch && reader.ch < 127) ?
1005                                                 String.format("%s", reader.ch) :
1006                                                 String.format("\\u%04x", (int)reader.ch);
1007                             }
1008                             lexError(pos, Errors.IllegalChar(arg));
1009                             reader.scanChar();
1010                         }
1011                     }
1012                     break loop;
1013                 }
1014             }
1015             endPos = reader.bp;
1016             switch (tk.tag) {
1017                 case DEFAULT: return new Token(tk, pos, endPos, comments);
1018                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
1019                 case STRING: {
1020                     // Get characters from string buffer.
1021                     String string = reader.chars();
1022                     // If a text block.
1023                     if (shouldStripIndent) {
1024                         // Verify that the incidental indentation is consistent.
1025                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
1026                             Set<TextBlockSupport.WhitespaceChecks> checks =
1027                                     TextBlockSupport.checkWhitespace(string);
1028                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
1029                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1030                                         Warnings.InconsistentWhiteSpaceIndentation);
1031                             }
1032                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1033                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1034                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
1035                             }
1036                         }
1037                         // Remove incidental indentation.
1038                         string = TextBlockSupport.stripIndent(string);
1039                     }
1040                     // Translate escape sequences if present.
1041                     if (shouldTranslateEscapes) {
1042                         string = TextBlockSupport.translateEscapes(string);
1043                     }
1044                     // Build string token.
1045                     return new StringToken(tk, pos, endPos, string, comments);
1046                 }
1047                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1048                 default: throw new AssertionError();
1049             }
1050         }
1051         finally {
1052             if (scannerDebug) {
1053                     System.out.println("nextToken(" + pos
1054                                        + "," + endPos + ")=|" +
1055                                        new String(reader.getRawCharacters(pos, endPos))
1056                                        + "|");
1057             }
1058         }
1059     }
1060     //where
1061         List<Comment> addComment(List<Comment> comments, Comment comment) {
1062             return comments == null ?
1063                     List.of(comment) :
1064                     comments.prepend(comment);
1065         }
1066 
1067     /** Return the position where a lexical error occurred;
1068      */
1069     public int errPos() {
1070         return errPos;
1071     }
1072 
1073     /** Set the position where a lexical error occurred;
1074      */
1075     public void errPos(int pos) {
1076         errPos = pos;
1077     }
1078 
1079     /**
1080      * Called when a complete comment has been scanned. pos and endPos
1081      * will mark the comment boundary.
1082      */
1083     protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) {
1084         if (scannerDebug)
1085             System.out.println("processComment(" + pos
1086                                + "," + endPos + "," + style + ")=|"
1087                                + new String(reader.getRawCharacters(pos, endPos))
1088                                + "|");
1089         char[] buf = reader.getRawCharacters(pos, endPos);
1090         return new BasicComment<>(new UnicodeReader(fac, buf, buf.length), style);
1091     }
1092 
1093     /**
1094      * Called when a complete whitespace run has been scanned. pos and endPos
1095      * will mark the whitespace boundary.
1096      */
1097     protected void processWhiteSpace(int pos, int endPos) {
1098         if (scannerDebug)
1099             System.out.println("processWhitespace(" + pos
1100                                + "," + endPos + ")=|" +
1101                                new String(reader.getRawCharacters(pos, endPos))
1102                                + "|");
1103     }
1104 
1105     /**
1106      * Called when a line terminator has been processed.
1107      */
1108     protected void processLineTerminator(int pos, int endPos) {
1109         if (scannerDebug)
1110             System.out.println("processTerminator(" + pos
1111                                + "," + endPos + ")=|" +
1112                                new String(reader.getRawCharacters(pos, endPos))
1113                                + "|");
1114     }
1115 
1116     /** Build a map for translating between line numbers and
1117      * positions in the input.
1118      *
1119      * @return a LineMap */
1120     public Position.LineMap getLineMap() {
1121         return Position.makeLineMap(reader.getRawCharacters(), reader.buflen, false);
1122     }
1123 
1124 
1125     /**
1126     * Scan a documentation comment; determine if a deprecated tag is present.
1127     * Called once the initial /, * have been skipped, positioned at the second *
1128     * (which is treated as the beginning of the first line).
1129     * Stops positioned at the closing '/'.
1130     */
1131     protected static class BasicComment<U extends UnicodeReader> implements Comment {
1132 
1133         CommentStyle cs;
1134         U comment_reader;
1135 
1136         protected boolean deprecatedFlag = false;
1137         protected boolean scanned = false;
1138 
1139         protected BasicComment(U comment_reader, CommentStyle cs) {
1140             this.comment_reader = comment_reader;
1141             this.cs = cs;
1142         }
1143 
1144         public String getText() {
1145             return null;
1146         }
1147 
1148         public int getSourcePos(int pos) {
1149             return -1;
1150         }
1151 
1152         public CommentStyle getStyle() {
1153             return cs;
1154         }
1155 
1156         public boolean isDeprecated() {
1157             if (!scanned && cs == CommentStyle.JAVADOC) {
1158                 scanDocComment();
1159             }
1160             return deprecatedFlag;
1161         }
1162 
1163         @SuppressWarnings("fallthrough")
1164         protected void scanDocComment() {
1165             try {
1166                 boolean deprecatedPrefix = false;
1167 
1168                 comment_reader.bp += 3; // '/**'
1169                 comment_reader.ch = comment_reader.buf[comment_reader.bp];
1170 
1171                 forEachLine:
1172                 while (comment_reader.bp < comment_reader.buflen) {
1173 
1174                     // Skip optional WhiteSpace at beginning of line
1175                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1176                         comment_reader.scanCommentChar();
1177                     }
1178 
1179                     // Skip optional consecutive Stars
1180                     while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
1181                         comment_reader.scanCommentChar();
1182                         if (comment_reader.ch == '/') {
1183                             return;
1184                         }
1185                     }
1186 
1187                     // Skip optional WhiteSpace after Stars
1188                     while (comment_reader.bp < comment_reader.buflen && (comment_reader.ch == ' ' || comment_reader.ch == '\t' || comment_reader.ch == FF)) {
1189                         comment_reader.scanCommentChar();
1190                     }
1191 
1192                     deprecatedPrefix = false;
1193                     // At beginning of line in the JavaDoc sense.
1194                     if (!deprecatedFlag) {
1195                         String deprecated = "@deprecated";
1196                         int i = 0;
1197                         while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == deprecated.charAt(i)) {
1198                             comment_reader.scanCommentChar();
1199                             i++;
1200                             if (i == deprecated.length()) {
1201                                 deprecatedPrefix = true;
1202                                 break;
1203                             }
1204                         }
1205                     }
1206 
1207                     if (deprecatedPrefix && comment_reader.bp < comment_reader.buflen) {
1208                         if (Character.isWhitespace(comment_reader.ch)) {
1209                             deprecatedFlag = true;
1210                         } else if (comment_reader.ch == '*') {
1211                             comment_reader.scanCommentChar();
1212                             if (comment_reader.ch == '/') {
1213                                 deprecatedFlag = true;
1214                                 return;
1215                             }
1216                         }
1217                     }
1218 
1219                     // Skip rest of line
1220                     while (comment_reader.bp < comment_reader.buflen) {
1221                         switch (comment_reader.ch) {
1222                             case '*':
1223                                 comment_reader.scanCommentChar();
1224                                 if (comment_reader.ch == '/') {
1225                                     return;
1226                                 }
1227                                 break;
1228                             case CR: // (Spec 3.4)
1229                                 comment_reader.scanCommentChar();
1230                                 if (comment_reader.ch != LF) {
1231                                     continue forEachLine;
1232                                 }
1233                             /* fall through to LF case */
1234                             case LF: // (Spec 3.4)
1235                                 comment_reader.scanCommentChar();
1236                                 continue forEachLine;
1237                             default:
1238                                 comment_reader.scanCommentChar();
1239                         }
1240                     } // rest of line
1241                 } // forEachLine
1242                 return;
1243             } finally {
1244                 scanned = true;
1245             }
1246         }
1247     }
1248 }