< prev index next >

src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java

Print this page




   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 
  28 import com.sun.tools.javac.code.Lint;
  29 import com.sun.tools.javac.code.Lint.LintCategory;
  30 import com.sun.tools.javac.code.Preview;
  31 import com.sun.tools.javac.code.Source;
  32 import com.sun.tools.javac.code.Source.Feature;
  33 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  34 import com.sun.tools.javac.resources.CompilerProperties.Errors;
  35 import com.sun.tools.javac.resources.CompilerProperties.Warnings;
  36 import com.sun.tools.javac.util.*;
  37 import com.sun.tools.javac.util.JCDiagnostic.*;
  38 
  39 import java.lang.reflect.InvocationTargetException;
  40 import java.lang.reflect.Method;
  41 import java.nio.CharBuffer;
  42 import java.util.HashSet;
  43 import java.util.Set;
  44 
  45 import static com.sun.tools.javac.parser.Tokens.*;
  46 import static com.sun.tools.javac.util.LayoutCharacters.*;
  47 
  48 /** The lexical analyzer maps an input stream consisting of
  49  *  ASCII characters and Unicode escapes into a token sequence.
  50  *
  51  *  <p><b>This is NOT part of any supported API.
  52  *  If you write code that depends on this, you do so at your own risk.
  53  *  This code and its internal interfaces are subject to change or
  54  *  deletion without notice.</b>
  55  */
  56 public class JavaTokenizer {
  57 
  58     private static final boolean scannerDebug = false;
  59 
  60     /** The source language setting.
  61      */
  62     private Source source;
  63 


  74     /** The token kind, set by nextToken().
  75      */
  76     protected TokenKind tk;
  77 
  78     /** The token's radix, set by nextToken().
  79      */
  80     protected int radix;
  81 
  82     /** The token's name, set by nextToken().
  83      */
  84     protected Name name;
  85 
  86     /** The position where a lexical error occurred;
  87      */
  88     protected int errPos = Position.NOPOS;
  89 
  90     /** The Unicode reader (low-level stream reader).
  91      */
  92     protected UnicodeReader reader;
  93 
  94     /** Should the string stripped of indentation?
  95      */
  96     protected boolean shouldStripIndent;
  97 
  98     /** Should the string's escapes be translated?
  99      */
 100     protected boolean shouldTranslateEscapes;
 101 
 102     protected ScannerFactory fac;
 103 
 104     // The set of lint options currently in effect. It is initialized
 105     // from the context, and then is set/reset as needed by Attr as it
 106     // visits all the various parts of the trees during attribution.
 107     protected Lint lint;
 108 
 109     private static final boolean hexFloatsWork = hexFloatsWork();
 110     private static boolean hexFloatsWork() {
 111         try {
 112             Float.valueOf("0x1.0p1");
 113             return true;
 114         } catch (NumberFormatException ex) {
 115             return false;
 116         }
 117     }
 118 
 119     /**
 120      * Create a scanner from the input array.  This method might
 121      * modify the array.  To avoid copying the input array, ensure
 122      * that {@code inputLength < input.length} or
 123      * {@code input[input.length -1]} is a white space character.
 124      *
 125      * @param fac the factory which created this Scanner
 126      * @param buf the input, might be modified
 127      * Must be positive and less than or equal to input.length.
 128      */
 129     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 130         this(fac, new UnicodeReader(fac, buf));
 131     }
 132 
 133     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 134         this(fac, new UnicodeReader(fac, buf, inputLength));
 135     }
 136 
 137     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 138         this.fac = fac;
 139         this.log = fac.log;
 140         this.tokens = fac.tokens;
 141         this.source = fac.source;
 142         this.preview = fac.preview;
 143         this.reader = reader;
 144         this.lint = fac.lint;
 145     }
 146 
 147     protected void checkSourceLevel(int pos, Feature feature) {
 148         if (preview.isPreview(feature) && !preview.isEnabled()) {
 149             //preview feature without --preview flag, error
 150             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 151         } else if (!feature.allowedInSource(source)) {
 152             //incompatible source level, error
 153             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 154         } else if (preview.isPreview(feature)) {
 155             //use of preview feature, warn
 156             preview.warnPreview(pos, feature);
 157         }
 158     }
 159 
 160     /** Report an error at the given position using the provided arguments.
 161      */
 162     protected void lexError(int pos, JCDiagnostic.Error key) {
 163         log.error(pos, key);
 164         tk = TokenKind.ERROR;
 165         errPos = pos;
 166     }
 167 
 168     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 169         log.error(flags, pos, key);
 170         tk = TokenKind.ERROR;
 171         errPos = pos;
 172     }
 173 
 174     protected void lexWarning(LintCategory lc, int pos, JCDiagnostic.Warning key) {
 175         DiagnosticPosition dp = new SimpleDiagnosticPosition(pos) ;
 176         log.warning(lc, dp, key);
 177     }
 178 
 179     /** Read next character in character or string literal and copy into sbuf.
 180      */
 181     private void scanLitChar(int pos) {
 182         if (reader.ch == '\\') {
 183             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 184                 reader.skipChar();
 185                 reader.putChar('\\', true);
 186             } else {
 187                 reader.scanChar();
 188                 switch (reader.ch) {
 189                 case '0': case '1': case '2': case '3':
 190                 case '4': case '5': case '6': case '7':
 191                     char leadch = reader.ch;
 192                     int oct = reader.digit(pos, 8);
 193                     reader.scanChar();
 194                     if ('0' <= reader.ch && reader.ch <= '7') {
 195                         oct = oct * 8 + reader.digit(pos, 8);
 196                         reader.scanChar();
 197                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 198                             oct = oct * 8 + reader.digit(pos, 8);


 209                     reader.putChar('\n', true); break;
 210                 case 'f':
 211                     reader.putChar('\f', true); break;
 212                 case 'r':
 213                     reader.putChar('\r', true); break;
 214                 case '\'':
 215                     reader.putChar('\'', true); break;
 216                 case '\"':
 217                     reader.putChar('\"', true); break;
 218                 case '\\':
 219                     reader.putChar('\\', true); break;
 220                 default:
 221                     lexError(reader.bp, Errors.IllegalEscChar);
 222                 }
 223             }
 224         } else if (reader.bp != reader.buflen) {
 225             reader.putChar(true);
 226         }
 227     }
 228 
 229     /** Read next character in character or string literal and copy into sbuf
 230      *  without translating escapes. Used by text blocks to preflight verify
 231      *  escapes sequences.
 232      */
 233     private void scanLitCharRaw(int pos) {
 234         if (reader.ch == '\\') {
 235             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 236                 reader.skipChar();
 237                 reader.putChar('\\', false);
 238                 reader.putChar('\\', true);
 239             } else {
 240                 reader.putChar('\\', true);
 241                 switch (reader.ch) {
 242                 case '0': case '1': case '2': case '3':
 243                 case '4': case '5': case '6': case '7':
 244                     char leadch = reader.ch;
 245                     reader.putChar(true);
 246                     if ('0' <= reader.ch && reader.ch <= '7') {
 247                         reader.putChar(true);
 248                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 249                             reader.putChar(true);
 250                         }
 251                     }
 252                     break;
 253                 // Effectively list of valid escape sequences.
 254                 case 'b':
 255                 case 't':
 256                 case 'n':
 257                 case 'f':
 258                 case 'r':
 259                 case '\'':
 260                 case '\"':
 261                 case '\\':
 262                     reader.putChar(true); break;
 263                 default:
 264                     lexError(reader.bp, Errors.IllegalEscChar);
 265                 }
 266             }
 267         } else if (reader.bp != reader.buflen) {
 268             reader.putChar(true);
 269         }
 270     }
 271 
 272     /** Interim access to String methods used to support text blocks.
 273      *  Required to handle bootstrapping with pre-text block jdks.
 274      *  Could be reworked in the 'next' jdk.
 275      */
 276     static class TextBlockSupport {
 277         /** Reflection method to remove incidental indentation.
 278          */
 279         private static final Method stripIndent;
 280 
 281         /** Reflection method to translate escape sequences.
 282          */
 283         private static final Method translateEscapes;
 284 
 285         /** true if stripIndent and translateEscapes are available in the bootstrap jdk.
 286          */
 287         private static final boolean hasSupport;
 288 
 289         /** Get a string method via refection or null if not available.
 290          */
 291         private static Method getStringMethodOrNull(String name) {
 292             try {
 293                 return String.class.getMethod(name);
 294             } catch (Exception ex) {
 295                 // Method not available, return null.
 296             }
 297             return null;
 298         }
 299 
 300         static {
 301             // Get text block string methods.
 302             stripIndent = getStringMethodOrNull("stripIndent");
 303             translateEscapes = getStringMethodOrNull("translateEscapes");
 304             // true if stripIndent and translateEscapes are available in the bootstrap jdk.
 305             hasSupport = stripIndent != null && translateEscapes != null;
 306         }
 307 
 308         /** Return true if stripIndent and translateEscapes are available in the bootstrap jdk.
 309          */
 310         static boolean hasSupport() {
 311             return hasSupport;
 312         }
 313 
 314         /** Return the leading whitespace count (indentation) of the line.
 315          */
 316         private static int indent(String line) {
 317             return line.length() - line.stripLeading().length();
 318         }
 319 
 320         enum WhitespaceChecks {
 321             INCONSISTENT,
 322             TRAILING
 323         };
 324 
 325         /** Check that the use of white space in content is not problematic.
 326          */
 327         static Set<WhitespaceChecks> checkWhitespace(String string) {
 328             // Start with empty result set.
 329             Set<WhitespaceChecks> checks = new HashSet<>();
 330             // No need to check empty strings.
 331             if (string.isEmpty()) {
 332                 return checks;
 333             }
 334             // Maximum common indentation.
 335             int outdent = 0;
 336             // No need to check indentation if opting out (last line is empty.)
 337             char lastChar = string.charAt(string.length() - 1);
 338             boolean optOut = lastChar == '\n' || lastChar == '\r';
 339             // Split string based at line terminators.
 340             String[] lines = string.split("\\R");
 341             int length = lines.length;
 342             // Extract last line.
 343             String lastLine = lines[length - 1];
 344             if (!optOut) {
 345                 // Prime with the last line indentation (may be blank.)
 346                 outdent = indent(lastLine);
 347                 for (String line : lines) {
 348                     // Blanks lines have no influence (last line accounted for.)
 349                     if (!line.isBlank()) {
 350                         outdent = Integer.min(outdent, indent(line));
 351                         if (outdent == 0) {
 352                             break;
 353                         }
 354                     }
 355                 }
 356             }
 357             // Last line is representative.
 358             String start = lastLine.substring(0, outdent);
 359             for (String line : lines) {
 360                 // Fail if a line does not have the same indentation.
 361                 if (!line.isBlank() && !line.startsWith(start)) {
 362                     // Mix of different white space
 363                     checks.add(WhitespaceChecks.INCONSISTENT);
 364                 }
 365                 // Line has content even after indent is removed.
 366                 if (outdent < line.length()) {
 367                     // Is the last character a white space.
 368                     lastChar = line.charAt(line.length() - 1);
 369                     if (Character.isWhitespace(lastChar)) {
 370                         // Has trailing white space.
 371                         checks.add(WhitespaceChecks.TRAILING);
 372                     }
 373                 }
 374             }
 375             return checks;
 376         }
 377 
 378         /** Invoke String::stripIndent through reflection.
 379          */
 380         static String stripIndent(String string) {
 381             try {
 382                 string = (String)stripIndent.invoke(string);
 383             } catch (InvocationTargetException | IllegalAccessException ex) {
 384                 throw new RuntimeException(ex);
 385             }
 386             return string;
 387         }
 388 
 389         /** Invoke String::translateEscapes through reflection.
 390          */
 391         static String translateEscapes(String string) {
 392             try {
 393                 string = (String)translateEscapes.invoke(string);
 394             } catch (InvocationTargetException | IllegalAccessException ex) {
 395                 throw new RuntimeException(ex);
 396             }
 397             return string;
 398         }
 399     }
 400 
 401     /** Test for EOLN.
 402      */
 403     private boolean isEOLN() {
 404         return reader.ch == LF || reader.ch == CR;
 405     }
 406 
 407     /** Test for CRLF.
 408      */
 409     private boolean isCRLF() {
 410         return reader.ch == CR && reader.peekChar() == LF;
 411     }
 412 
 413     /** Count and skip repeated occurances of the specified character.
 414      */
 415     private int countChar(char ch, int max) {
 416         int count = 0;
 417         for ( ; count < max && reader.bp < reader.buflen && reader.ch == ch; count++) {
 418             reader.scanChar();
 419         }
 420         return count;
 421     }
 422 
 423     /** Scan a string literal or text block.
 424      */
 425     private void scanString(int pos) {
 426         // Clear flags.
 427         shouldStripIndent = false;
 428         shouldTranslateEscapes = false;
 429         // Check if text block string methods are present.
 430         boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
 431         // Track the end of first line for error recovery.
 432         int firstEOLN = -1;
 433         // Attempt to scan for up to 3 double quotes.
 434         int openCount = countChar('\"', 3);
 435         switch (openCount) {
 436         case 1: // Starting a string literal.
 437             break;
 438         case 2: // Starting an empty string literal.
 439             // Start again but only consume one quote.
 440             reader.reset(pos);
 441             openCount = countChar('\"', 1);
 442             break;
 443         case 3: // Starting a text block.
 444             // Check if preview feature is enabled for text blocks.
 445             checkSourceLevel(pos, Feature.TEXT_BLOCKS);
 446             // Only proceed if text block string methods are present.
 447             if (hasTextBlockSupport) {
 448                 // Indicate that the final string should have incidental indentation removed.
 449                 shouldStripIndent = true;
 450                 // Verify the open delimiter sequence.
 451                 boolean hasOpenEOLN = false;
 452                 while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
 453                     hasOpenEOLN = isEOLN();
 454                     if (hasOpenEOLN) {
 455                         break;
 456                     }
 457                     reader.scanChar();
 458                 }
 459                 // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
 460                 if (!hasOpenEOLN) {
 461                     lexError(reader.bp, Errors.IllegalTextBlockOpen);
 462                     return;
 463                 }
 464                 // Skip line terminator.
 465                 int start = reader.bp;
 466                 if (isCRLF()) {
 467                     reader.scanChar();
 468                 }
 469                 reader.scanChar();
 470                 processLineTerminator(start, reader.bp);
 471             } else {
 472                 // No text block string methods are present, so reset and treat like string literal.
 473                 reader.reset(pos);
 474                 openCount = countChar('\"', 1);
 475             }
 476             break;
 477         }
 478         // While characters are available.
 479         while (reader.bp < reader.buflen) {
 480             // If possible close delimiter sequence.
 481             if (reader.ch == '\"') {
 482                 // Check to see if enough double quotes are present.
 483                 int closeCount = countChar('\"', openCount);
 484                 if (openCount == closeCount) {
 485                     // Good result.
 486                     tk = Tokens.TokenKind.STRINGLITERAL;
 487                     return;
 488                 }
 489                 // False alarm, add double quotes to string buffer.
 490                 reader.repeat('\"', closeCount);
 491             } else if (isEOLN()) {
 492                 // Line terminator in string literal is an error.
 493                 // Fall out to unclosed string literal error.
 494                 if (openCount == 1) {
 495                     break;
 496                 }
 497                  // Add line terminator to string buffer.
 498                 int start = reader.bp;
 499                 if (isCRLF()) {
 500                     reader.scanChar();
 501                 }
 502                 reader.putChar('\n', true);
 503                 processLineTerminator(start, reader.bp);
 504                 // Record first line terminator for error recovery.
 505                 if (firstEOLN == -1) {
 506                     firstEOLN = reader.bp;
 507                 }
 508             } else if (reader.ch == '\\') {
 509                 // Handle escape sequences.
 510                 if (hasTextBlockSupport) {
 511                     // Indicate that the final string should have escapes translated.
 512                     shouldTranslateEscapes = true;
 513                     // Validate escape sequence and add to string buffer.
 514                     scanLitCharRaw(pos);
 515                 } else {
 516                     // Translate escape sequence and add result to string buffer.
 517                     scanLitChar(pos);
 518                 }
 519             } else {
 520                 // Add character to string buffer.
 521                 reader.putChar(true);
 522             }
 523         }
 524         // String ended without close delimiter sequence.
 525         lexError(pos, openCount == 1 ? Errors.UnclosedStrLit : Errors.UnclosedTextBlock);
 526         if (firstEOLN  != -1) {
 527             // Reset recovery position to point after open delimiter sequence.
 528             reader.reset(firstEOLN);
 529         }
 530     }
 531 
 532     private void scanDigits(int pos, int digitRadix) {
 533         char saveCh;
 534         int savePos;
 535         do {
 536             if (reader.ch != '_') {
 537                 reader.putChar(false);
 538             }
 539             saveCh = reader.ch;
 540             savePos = reader.bp;
 541             reader.scanChar();
 542         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 543         if (saveCh == '_')
 544             lexError(savePos, Errors.IllegalUnderscore);
 545     }
 546 
 547     /** Read fractional part of hexadecimal floating point number.
 548      */
 549     private void scanHexExponentAndSuffix(int pos) {
 550         if (reader.ch == 'p' || reader.ch == 'P') {
 551             reader.putChar(true);


 936                             reader.scanChar();
 937                             comments = addComment(comments, processComment(pos, reader.bp, style));
 938                             break;
 939                         } else {
 940                             lexError(pos, Errors.UnclosedComment);
 941                             break loop;
 942                         }
 943                     } else if (reader.ch == '=') {
 944                         tk = TokenKind.SLASHEQ;
 945                         reader.scanChar();
 946                     } else {
 947                         tk = TokenKind.SLASH;
 948                     }
 949                     break loop;
 950                 case '\'':
 951                     reader.scanChar();
 952                     if (reader.ch == '\'') {
 953                         lexError(pos, Errors.EmptyCharLit);
 954                         reader.scanChar();
 955                     } else {
 956                         if (isEOLN())
 957                             lexError(pos, Errors.IllegalLineEndInCharLit);
 958                         scanLitChar(pos);
 959                         if (reader.ch == '\'') {
 960                             reader.scanChar();
 961                             tk = TokenKind.CHARLITERAL;
 962                         } else {
 963                             lexError(pos, Errors.UnclosedCharLit);
 964                         }
 965                     }
 966                     break loop;
 967                 case '\"':
 968                     scanString(pos);








 969                     break loop;
 970                 default:
 971                     if (isSpecial(reader.ch)) {
 972                         scanOperator();
 973                     } else {
 974                         boolean isJavaIdentifierStart;
 975                         int codePoint = -1;
 976                         if (reader.ch < '\u0080') {
 977                             // all ASCII range chars already handled, above
 978                             isJavaIdentifierStart = false;
 979                         } else {
 980                             codePoint = reader.peekSurrogates();
 981                             if (codePoint >= 0) {
 982                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 983                                     reader.putChar(true);
 984                                 }
 985                             } else {
 986                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 987                             }
 988                         }
 989                         if (isJavaIdentifierStart) {
 990                             scanIdent();


 999                             if (codePoint >= 0) {
1000                                 char high = reader.ch;
1001                                 reader.scanChar();
1002                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
1003                             } else {
1004                                 arg = (32 < reader.ch && reader.ch < 127) ?
1005                                                 String.format("%s", reader.ch) :
1006                                                 String.format("\\u%04x", (int)reader.ch);
1007                             }
1008                             lexError(pos, Errors.IllegalChar(arg));
1009                             reader.scanChar();
1010                         }
1011                     }
1012                     break loop;
1013                 }
1014             }
1015             endPos = reader.bp;
1016             switch (tk.tag) {
1017                 case DEFAULT: return new Token(tk, pos, endPos, comments);
1018                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
1019                 case STRING: {
1020                     // Get characters from string buffer.
1021                     String string = reader.chars();
1022                     // If a text block.
1023                     if (shouldStripIndent) {
1024                         // Verify that the incidental indentation is consistent.
1025                         if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
1026                             Set<TextBlockSupport.WhitespaceChecks> checks =
1027                                     TextBlockSupport.checkWhitespace(string);
1028                             if (checks.contains(TextBlockSupport.WhitespaceChecks.INCONSISTENT)) {
1029                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1030                                         Warnings.InconsistentWhiteSpaceIndentation);
1031                             }
1032                             if (checks.contains(TextBlockSupport.WhitespaceChecks.TRAILING)) {
1033                                 lexWarning(LintCategory.TEXT_BLOCKS, pos,
1034                                         Warnings.TrailingWhiteSpaceWillBeRemoved);
1035                             }
1036                         }
1037                         // Remove incidental indentation.
1038                         string = TextBlockSupport.stripIndent(string);
1039                     }
1040                     // Translate escape sequences if present.
1041                     if (shouldTranslateEscapes) {
1042                         string = TextBlockSupport.translateEscapes(string);
1043                     }
1044                     // Build string token.
1045                     return new StringToken(tk, pos, endPos, string, comments);
1046                 }
1047                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
1048                 default: throw new AssertionError();
1049             }
1050         }
1051         finally {
1052             if (scannerDebug) {
1053                     System.out.println("nextToken(" + pos
1054                                        + "," + endPos + ")=|" +
1055                                        new String(reader.getRawCharacters(pos, endPos))
1056                                        + "|");
1057             }
1058         }
1059     }
1060     //where
1061         List<Comment> addComment(List<Comment> comments, Comment comment) {
1062             return comments == null ?
1063                     List.of(comment) :
1064                     comments.prepend(comment);
1065         }
1066 




   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.tools.javac.parser;
  27 


  28 import com.sun.tools.javac.code.Preview;
  29 import com.sun.tools.javac.code.Source;
  30 import com.sun.tools.javac.code.Source.Feature;
  31 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
  32 import com.sun.tools.javac.resources.CompilerProperties.Errors;

  33 import com.sun.tools.javac.util.*;
  34 import com.sun.tools.javac.util.JCDiagnostic.DiagnosticFlag;
  35 


  36 import java.nio.CharBuffer;


  37 
  38 import static com.sun.tools.javac.parser.Tokens.*;
  39 import static com.sun.tools.javac.util.LayoutCharacters.*;
  40 
  41 /** The lexical analyzer maps an input stream consisting of
  42  *  ASCII characters and Unicode escapes into a token sequence.
  43  *
  44  *  <p><b>This is NOT part of any supported API.
  45  *  If you write code that depends on this, you do so at your own risk.
  46  *  This code and its internal interfaces are subject to change or
  47  *  deletion without notice.</b>
  48  */
  49 public class JavaTokenizer {
  50 
  51     private static final boolean scannerDebug = false;
  52 
  53     /** The source language setting.
  54      */
  55     private Source source;
  56 


  67     /** The token kind, set by nextToken().
  68      */
  69     protected TokenKind tk;
  70 
  71     /** The token's radix, set by nextToken().
  72      */
  73     protected int radix;
  74 
  75     /** The token's name, set by nextToken().
  76      */
  77     protected Name name;
  78 
  79     /** The position where a lexical error occurred;
  80      */
  81     protected int errPos = Position.NOPOS;
  82 
  83     /** The Unicode reader (low-level stream reader).
  84      */
  85     protected UnicodeReader reader;
  86 








  87     protected ScannerFactory fac;
  88 





  89     private static final boolean hexFloatsWork = hexFloatsWork();
  90     private static boolean hexFloatsWork() {
  91         try {
  92             Float.valueOf("0x1.0p1");
  93             return true;
  94         } catch (NumberFormatException ex) {
  95             return false;
  96         }
  97     }
  98 
  99     /**
 100      * Create a scanner from the input array.  This method might
 101      * modify the array.  To avoid copying the input array, ensure
 102      * that {@code inputLength < input.length} or
 103      * {@code input[input.length -1]} is a white space character.
 104      *
 105      * @param fac the factory which created this Scanner
 106      * @param buf the input, might be modified
 107      * Must be positive and less than or equal to input.length.
 108      */
 109     protected JavaTokenizer(ScannerFactory fac, CharBuffer buf) {
 110         this(fac, new UnicodeReader(fac, buf));
 111     }
 112 
 113     protected JavaTokenizer(ScannerFactory fac, char[] buf, int inputLength) {
 114         this(fac, new UnicodeReader(fac, buf, inputLength));
 115     }
 116 
 117     protected JavaTokenizer(ScannerFactory fac, UnicodeReader reader) {
 118         this.fac = fac;
 119         this.log = fac.log;
 120         this.tokens = fac.tokens;
 121         this.source = fac.source;
 122         this.preview = fac.preview;
 123         this.reader = reader;

 124     }
 125 
 126     protected void checkSourceLevel(int pos, Feature feature) {
 127         if (preview.isPreview(feature) && !preview.isEnabled()) {
 128             //preview feature without --preview flag, error
 129             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, preview.disabledError(feature));
 130         } else if (!feature.allowedInSource(source)) {
 131             //incompatible source level, error
 132             lexError(DiagnosticFlag.SOURCE_LEVEL, pos, feature.error(source.name));
 133         } else if (preview.isPreview(feature)) {
 134             //use of preview feature, warn
 135             preview.warnPreview(pos, feature);
 136         }
 137     }
 138 
 139     /** Report an error at the given position using the provided arguments.
 140      */
 141     protected void lexError(int pos, JCDiagnostic.Error key) {
 142         log.error(pos, key);
 143         tk = TokenKind.ERROR;
 144         errPos = pos;
 145     }
 146 
 147     protected void lexError(DiagnosticFlag flags, int pos, JCDiagnostic.Error key) {
 148         log.error(flags, pos, key);
 149         tk = TokenKind.ERROR;
 150         errPos = pos;
 151     }
 152 





 153     /** Read next character in character or string literal and copy into sbuf.
 154      */
 155     private void scanLitChar(int pos) {
 156         if (reader.ch == '\\') {
 157             if (reader.peekChar() == '\\' && !reader.isUnicode()) {
 158                 reader.skipChar();
 159                 reader.putChar('\\', true);
 160             } else {
 161                 reader.scanChar();
 162                 switch (reader.ch) {
 163                 case '0': case '1': case '2': case '3':
 164                 case '4': case '5': case '6': case '7':
 165                     char leadch = reader.ch;
 166                     int oct = reader.digit(pos, 8);
 167                     reader.scanChar();
 168                     if ('0' <= reader.ch && reader.ch <= '7') {
 169                         oct = oct * 8 + reader.digit(pos, 8);
 170                         reader.scanChar();
 171                         if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
 172                             oct = oct * 8 + reader.digit(pos, 8);


 183                     reader.putChar('\n', true); break;
 184                 case 'f':
 185                     reader.putChar('\f', true); break;
 186                 case 'r':
 187                     reader.putChar('\r', true); break;
 188                 case '\'':
 189                     reader.putChar('\'', true); break;
 190                 case '\"':
 191                     reader.putChar('\"', true); break;
 192                 case '\\':
 193                     reader.putChar('\\', true); break;
 194                 default:
 195                     lexError(reader.bp, Errors.IllegalEscChar);
 196                 }
 197             }
 198         } else if (reader.bp != reader.buflen) {
 199             reader.putChar(true);
 200         }
 201     }
 202 















































































































































































































































































































 203     private void scanDigits(int pos, int digitRadix) {
 204         char saveCh;
 205         int savePos;
 206         do {
 207             if (reader.ch != '_') {
 208                 reader.putChar(false);
 209             }
 210             saveCh = reader.ch;
 211             savePos = reader.bp;
 212             reader.scanChar();
 213         } while (reader.digit(pos, digitRadix) >= 0 || reader.ch == '_');
 214         if (saveCh == '_')
 215             lexError(savePos, Errors.IllegalUnderscore);
 216     }
 217 
 218     /** Read fractional part of hexadecimal floating point number.
 219      */
 220     private void scanHexExponentAndSuffix(int pos) {
 221         if (reader.ch == 'p' || reader.ch == 'P') {
 222             reader.putChar(true);


 607                             reader.scanChar();
 608                             comments = addComment(comments, processComment(pos, reader.bp, style));
 609                             break;
 610                         } else {
 611                             lexError(pos, Errors.UnclosedComment);
 612                             break loop;
 613                         }
 614                     } else if (reader.ch == '=') {
 615                         tk = TokenKind.SLASHEQ;
 616                         reader.scanChar();
 617                     } else {
 618                         tk = TokenKind.SLASH;
 619                     }
 620                     break loop;
 621                 case '\'':
 622                     reader.scanChar();
 623                     if (reader.ch == '\'') {
 624                         lexError(pos, Errors.EmptyCharLit);
 625                         reader.scanChar();
 626                     } else {
 627                         if (reader.ch == CR || reader.ch == LF)
 628                             lexError(pos, Errors.IllegalLineEndInCharLit);
 629                         scanLitChar(pos);
 630                         if (reader.ch == '\'') {
 631                             reader.scanChar();
 632                             tk = TokenKind.CHARLITERAL;
 633                         } else {
 634                             lexError(pos, Errors.UnclosedCharLit);
 635                         }
 636                     }
 637                     break loop;
 638                 case '\"':
 639                     reader.scanChar();
 640                     while (reader.ch != '\"' && reader.ch != CR && reader.ch != LF && reader.bp < reader.buflen)
 641                         scanLitChar(pos);
 642                     if (reader.ch == '\"') {
 643                         tk = TokenKind.STRINGLITERAL;
 644                         reader.scanChar();
 645                     } else {
 646                         lexError(pos, Errors.UnclosedStrLit);
 647                     }
 648                     break loop;
 649                default:
 650                     if (isSpecial(reader.ch)) {
 651                         scanOperator();
 652                     } else {
 653                         boolean isJavaIdentifierStart;
 654                         int codePoint = -1;
 655                         if (reader.ch < '\u0080') {
 656                             // all ASCII range chars already handled, above
 657                             isJavaIdentifierStart = false;
 658                         } else {
 659                             codePoint = reader.peekSurrogates();
 660                             if (codePoint >= 0) {
 661                                 if (isJavaIdentifierStart = Character.isJavaIdentifierStart(codePoint)) {
 662                                     reader.putChar(true);
 663                                 }
 664                             } else {
 665                                 isJavaIdentifierStart = Character.isJavaIdentifierStart(reader.ch);
 666                             }
 667                         }
 668                         if (isJavaIdentifierStart) {
 669                             scanIdent();


 678                             if (codePoint >= 0) {
 679                                 char high = reader.ch;
 680                                 reader.scanChar();
 681                                 arg = String.format("\\u%04x\\u%04x", (int) high, (int)reader.ch);
 682                             } else {
 683                                 arg = (32 < reader.ch && reader.ch < 127) ?
 684                                                 String.format("%s", reader.ch) :
 685                                                 String.format("\\u%04x", (int)reader.ch);
 686                             }
 687                             lexError(pos, Errors.IllegalChar(arg));
 688                             reader.scanChar();
 689                         }
 690                     }
 691                     break loop;
 692                 }
 693             }
 694             endPos = reader.bp;
 695             switch (tk.tag) {
 696                 case DEFAULT: return new Token(tk, pos, endPos, comments);
 697                 case NAMED: return new NamedToken(tk, pos, endPos, name, comments);
 698                 case STRING: return new StringToken(tk, pos, endPos, reader.chars(), comments);



























 699                 case NUMERIC: return new NumericToken(tk, pos, endPos, reader.chars(), radix, comments);
 700                 default: throw new AssertionError();
 701             }
 702         }
 703         finally {
 704             if (scannerDebug) {
 705                     System.out.println("nextToken(" + pos
 706                                        + "," + endPos + ")=|" +
 707                                        new String(reader.getRawCharacters(pos, endPos))
 708                                        + "|");
 709             }
 710         }
 711     }
 712     //where
 713         List<Comment> addComment(List<Comment> comments, Comment comment) {
 714             return comments == null ?
 715                     List.of(comment) :
 716                     comments.prepend(comment);
 717         }
 718 


< prev index next >