1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package jdk.incubator.code.extern.impl;
27
28 import jdk.incubator.code.extern.impl.Tokens.TokenKind;
29 import java.util.ArrayList;
30 import java.util.List;
31
32 /**
33 * The lexical analyzer maps an input stream consisting of UTF-8 characters and unicode
34 * escape sequences into a token sequence.
35 */
36 final class JavaBasedTokenizer extends UnicodeReader {
37 /**
38 * If true then prints token information after each nextToken().
39 */
40 private static final boolean scannerDebug = false;
41
42 /**
43 * Sentinel for non-value.
44 */
45 private static final int NOT_FOUND = -1;
46
47 /**
48 * The log to be used for error reporting. Copied from scanner factory.
49 */
50 private final Log log;
51
52 /**
53 * The token factory. Copied from scanner factory.
54 */
55 private final Tokens tokens;
56
57 /**
58 * The token kind, set by nextToken().
59 */
60 Tokens.TokenKind tk;
61
62 /**
63 * The token's radix, set by nextToken().
64 */
65 int radix;
66
67 /**
68 * The token's name, set by nextToken().
69 */
70 String name;
71
72 /**
73 * The position where a lexical error occurred;
74 */
75 int errPos = Position.NOPOS;
76
77 /**
78 * true if contains escape sequences, set by nextToken().
79 */
80 boolean hasEscapeSequences;
81
82 /**
83 * Buffer for building literals, used by nextToken().
84 */
85 StringBuilder sb;
86
87 /**
88 * Construct a Java token scanner from the input character array.
89 *
90 * @param fac the factory which created this Scanner
91 * @param array the input character array.
92 * @param length The length of the meaningful content in the array.
93 */
94 JavaBasedTokenizer(Scanner.Factory fac, char[] array, int length) {
95 super(fac, array, length);
96 this.log = fac.log;
97 this.tokens = fac.tokens;
98 this.sb = new StringBuilder(256);
99 }
100
101 /**
102 * Report an error at the given position using the provided arguments.
103 *
104 * @param pos position in input buffer.
105 * @param key error key to report.
106 */
107 void lexError(int pos, Errors.Error key) {
108 tk = Tokens.TokenKind.ERROR;
109 errPos = pos;
110 log.error(pos, key);
111 }
112
113 /**
114 * Add a character to the literal buffer.
115 *
116 * @param ch character to add.
117 */
118 void put(char ch) {
119 sb.append(ch);
120 }
121
122 /**
123 * Add a codepoint to the literal buffer.
124 *
125 * @param codePoint codepoint to add.
126 */
127 void putCodePoint(int codePoint) {
128 sb.appendCodePoint(codePoint);
129 }
130
131 /**
132 * Add current character or codepoint to the literal buffer.
133 */
134 void put() {
135 if (isSurrogate()) {
136 putCodePoint(getCodepoint());
137 } else {
138 put(get());
139 }
140 }
141
142 /**
143 * Add a string to the literal buffer.
144 */
145 void put(String string) {
146 sb.append(string);
147 }
148
149 /**
150 * Add current character or codepoint to the literal buffer then return next character.
151 */
152 char putThenNext() {
153 put();
154
155 return next();
156 }
157
158 /**
159 * If the specified character ch matches the current character then add current character
160 * to the literal buffer and then advance.
161 *
162 * @param ch character to match.
163 * @return true if ch matches current character.
164 */
165 boolean acceptThenPut(char ch) {
166 if (is(ch)) {
167 put(get());
168 next();
169
170 return true;
171 }
172
173 return false;
174 }
175
176 /**
177 * If either ch1 or ch2 matches the current character then add current character
178 * to the literal buffer and then advance.
179 *
180 * @param ch1 first character to match.
181 * @param ch2 second character to match.
182 * @return true if either ch1 or ch2 matches current character.
183 */
184 boolean acceptOneOfThenPut(char ch1, char ch2) {
185 if (isOneOf(ch1, ch2)) {
186 put(get());
187 next();
188
189 return true;
190 }
191
192 return false;
193 }
194
195 /**
196 * Test if the current character is a line terminator.
197 *
198 * @return true if current character is a line terminator.
199 */
200 private boolean isEOLN() {
201 return isOneOf('\n', '\r');
202 }
203
204 /**
205 * Processes the current character and places in the literal buffer. If the current
206 * character is a backslash then the next character is validated as a proper
207 * escape character. Conversion of escape sequences takes place at end of nextToken().
208 *
209 * @param pos position of the first character in literal.
210 */
211 private void scanLitChar(int pos) {
212 if (acceptThenPut('\\')) {
213 hasEscapeSequences = true;
214
215 switch (get()) {
216 case '0':
217 case '1':
218 case '2':
219 case '3':
220 case '4':
221 case '5':
222 case '6':
223 case '7':
224 char leadch = get();
225 putThenNext();
226
227 if (inRange('0', '7')) {
228 putThenNext();
229
230 if (leadch <= '3' && inRange('0', '7')) {
231 putThenNext();
232 }
233 }
234 break;
235
236 case 'b':
237 case 't':
238 case 'n':
239 case 'f':
240 case 'r':
241 case '\'':
242 case '\"':
243 case '\\':
244 putThenNext();
245 break;
246
247 case '\n':
248 case '\r':
249 lexError(position(), Errors.IllegalEscChar);
250 break;
251
252 default:
253 lexError(position(), Errors.IllegalEscChar);
254 break;
255 }
256 } else {
257 putThenNext();
258 }
259 }
260
261 /**
262 * Scan a string literal.
263 *
264 * @param pos position of the first character in literal.
265 */
266 private void scanString(int pos) {
267 // Assume the best.
268 tk = Tokens.TokenKind.STRINGLITERAL;
269 // Skip first quote.
270 next();
271
272 // While characters are available.
273 while (isAvailable()) {
274 if (accept('\"')) {
275 return;
276 }
277
278 if (isEOLN()) {
279 // Line terminator in string literal is an error.
280 // Fall out to unclosed string literal error.
281 break;
282 } else {
283 // Add character to string buffer.
284 scanLitChar(pos);
285 }
286 }
287
288 lexError(pos, Errors.UnclosedStrLit);
289 }
290
291 /**
292 * Scan sequence of digits.
293 *
294 * @param pos position of the first character in literal.
295 * @param digitRadix radix of numeric literal.
296 */
297 private void scanDigits(int pos, int digitRadix) {
298 int leadingUnderscorePos = is('_') ? position() : NOT_FOUND;
299 int trailingUnderscorePos;
300
301 do {
302 if (!is('_')) {
303 put();
304 trailingUnderscorePos = NOT_FOUND;
305 } else {
306 trailingUnderscorePos = position();
307 }
308
309 next();
310 } while (digit(pos, digitRadix) >= 0 || is('_'));
311
312 if (leadingUnderscorePos != NOT_FOUND) {
313 lexError(leadingUnderscorePos, Errors.IllegalUnderscore);
314 } else if (trailingUnderscorePos != NOT_FOUND) {
315 lexError(trailingUnderscorePos, Errors.IllegalUnderscore);
316 }
317 }
318
319 /**
320 * Read fractional part of hexadecimal floating point number.
321 *
322 * @param pos position of the first character in literal.
323 */
324 private void scanHexExponentAndSuffix(int pos) {
325 if (acceptOneOfThenPut('p', 'P')) {
326 skipIllegalUnderscores();
327 acceptOneOfThenPut('+', '-');
328 skipIllegalUnderscores();
329
330 if (digit(pos, 10) >= 0) {
331 scanDigits(pos, 10);
332 } else {
333 lexError(pos, Errors.MalformedFpLit);
334 }
335 } else {
336 lexError(pos, Errors.MalformedFpLit);
337 }
338
339 if (acceptOneOfThenPut('f', 'F')) {
340 tk = Tokens.TokenKind.FLOATLITERAL;
341 radix = 16;
342 } else {
343 acceptOneOfThenPut('d', 'D');
344 tk = Tokens.TokenKind.DOUBLELITERAL;
345 radix = 16;
346 }
347 }
348
349 /**
350 * Read fractional part of floating point number.
351 *
352 * @param pos position of the first character in literal.
353 */
354 private void scanFraction(int pos) {
355 skipIllegalUnderscores();
356
357 if (digit(pos, 10) >= 0) {
358 scanDigits(pos, 10);
359 }
360
361 int index = sb.length();
362
363 if (acceptOneOfThenPut('e', 'E')) {
364 skipIllegalUnderscores();
365 acceptOneOfThenPut('+', '-');
366 skipIllegalUnderscores();
367
368 if (digit(pos, 10) >= 0) {
369 scanDigits(pos, 10);
370 return;
371 }
372
373 lexError(pos, Errors.MalformedFpLit);
374 sb.setLength(index);
375 }
376 }
377
378 /**
379 * Read fractional part and 'd' or 'f' suffix of floating point number.
380 *
381 * @param pos position of the first character in literal.
382 */
383 private void scanFractionAndSuffix(int pos) {
384 radix = 10;
385 scanFraction(pos);
386
387 if (acceptOneOfThenPut('f', 'F')) {
388 tk = Tokens.TokenKind.FLOATLITERAL;
389 } else {
390 acceptOneOfThenPut('d', 'D');
391 tk = Tokens.TokenKind.DOUBLELITERAL;
392 }
393 }
394
395 /**
396 * Read fractional part and 'd' or 'f' suffix of hexadecimal floating point number.
397 *
398 * @param pos position of the first character in literal.
399 */
400 private void scanHexFractionAndSuffix(int pos, boolean seendigit) {
401 radix = 16;
402 assert is('.');
403 putThenNext();
404 skipIllegalUnderscores();
405
406 if (digit(pos, 16) >= 0) {
407 seendigit = true;
408 scanDigits(pos, 16);
409 }
410
411 if (!seendigit)
412 lexError(pos, Errors.InvalidHexNumber);
413 else
414 scanHexExponentAndSuffix(pos);
415 }
416
417 /**
418 * Skip over underscores and report as a error if found.
419 */
420 private void skipIllegalUnderscores() {
421 if (is('_')) {
422 lexError(position(), Errors.IllegalUnderscore);
423 skip('_');
424 }
425 }
426
427 /**
428 * Read a number. (Spec. 3.10)
429 *
430 * @param pos position of the first character in literal.
431 * @param radix the radix of the number; one of 2, 8, 10, 16.
432 */
433 private void scanNumber(int pos, int radix) {
434 // for octal, allow base-10 digit in case it's a float literal
435 this.radix = radix;
436 int digitRadix = (radix == 8 ? 10 : radix);
437 int firstDigit = digit(pos, Math.max(10, digitRadix));
438 boolean seendigit = firstDigit >= 0;
439 boolean seenValidDigit = firstDigit >= 0 && firstDigit < digitRadix;
440
441 if (seendigit) {
442 scanDigits(pos, digitRadix);
443 }
444
445 if (radix == 16 && is('.')) {
446 scanHexFractionAndSuffix(pos, seendigit);
447 } else if (seendigit && radix == 16 && isOneOf('p', 'P')) {
448 scanHexExponentAndSuffix(pos);
449 } else if (digitRadix == 10 && is('.')) {
450 putThenNext();
451 scanFractionAndSuffix(pos);
452 } else if (digitRadix == 10 && isOneOf('e', 'E', 'f', 'F', 'd', 'D')) {
453 scanFractionAndSuffix(pos);
454 } else {
455 if (!seenValidDigit) {
456 switch (radix) {
457 case 2:
458 lexError(pos, Errors.InvalidBinaryNumber);
459 break;
460 case 16:
461 lexError(pos, Errors.InvalidHexNumber);
462 break;
463 }
464 }
465 // If it is not a floating point literal,
466 // the octal number should be rescanned correctly.
467 if (radix == 8) {
468 sb.setLength(0);
469 reset(pos);
470 scanDigits(pos, 8);
471 }
472
473 if (acceptOneOf('l', 'L')) {
474 tk = Tokens.TokenKind.LONGLITERAL;
475 } else {
476 tk = Tokens.TokenKind.INTLITERAL;
477 }
478 }
479 }
480
481 /**
482 * Determines if the sequence in the literal buffer is a token (keyword, operator.)
483 */
484 private void checkIdent(Tokens.TokenKind identifier) {
485 name = sb.toString();
486 tk = tokens.lookupKind(name, identifier);
487 }
488
489 /**
490 * Read an identifier. (Spec. 3.8)
491 */
492 private void scanIdent() {
493 scanIdent(Tokens.TokenKind.IDENTIFIER);
494 }
495
496 /**
497 * Read an identifier. (Spec. 3.8)
498 */
499 private void scanIdent(Tokens.TokenKind identifier) {
500 putThenNext();
501
502 do {
503 switch (get()) {
504 case 'A':
505 case 'B':
506 case 'C':
507 case 'D':
508 case 'E':
509 case 'F':
510 case 'G':
511 case 'H':
512 case 'I':
513 case 'J':
514 case 'K':
515 case 'L':
516 case 'M':
517 case 'N':
518 case 'O':
519 case 'P':
520 case 'Q':
521 case 'R':
522 case 'S':
523 case 'T':
524 case 'U':
525 case 'V':
526 case 'W':
527 case 'X':
528 case 'Y':
529 case 'Z':
530 case 'a':
531 case 'b':
532 case 'c':
533 case 'd':
534 case 'e':
535 case 'f':
536 case 'g':
537 case 'h':
538 case 'i':
539 case 'j':
540 case 'k':
541 case 'l':
542 case 'm':
543 case 'n':
544 case 'o':
545 case 'p':
546 case 'q':
547 case 'r':
548 case 's':
549 case 't':
550 case 'u':
551 case 'v':
552 case 'w':
553 case 'x':
554 case 'y':
555 case 'z':
556 case '$':
557 case '_':
558 case '0':
559 case '1':
560 case '2':
561 case '3':
562 case '4':
563 case '5':
564 case '6':
565 case '7':
566 case '8':
567 case '9':
568 break;
569
570 case '\u0000':
571 case '\u0001':
572 case '\u0002':
573 case '\u0003':
574 case '\u0004':
575 case '\u0005':
576 case '\u0006':
577 case '\u0007':
578 case '\u0008':
579 case '\u000E':
580 case '\u000F':
581 case '\u0010':
582 case '\u0011':
583 case '\u0012':
584 case '\u0013':
585 case '\u0014':
586 case '\u0015':
587 case '\u0016':
588 case '\u0017':
589 case '\u0018':
590 case '\u0019':
591 case '\u001B':
592 case '\u007F':
593 next();
594 continue;
595
596 case '\u001A': // EOI is also a legal identifier part
597 if (isAvailable()) {
598 next();
599 continue;
600 }
601
602 checkIdent(identifier);
603 return;
604
605 default:
606 boolean isJavaIdentifierPart;
607
608 if (isASCII()) {
609 // all ASCII range chars already handled, above
610 isJavaIdentifierPart = false;
611 } else {
612 if (Character.isIdentifierIgnorable(get())) {
613 next();
614 continue;
615 }
616
617 isJavaIdentifierPart = isSurrogate()
618 ? Character.isJavaIdentifierPart(getCodepoint())
619 : Character.isJavaIdentifierPart(get());
620 }
621
622 if (!isJavaIdentifierPart) {
623 checkIdent(identifier);
624 return;
625 }
626 }
627
628 putThenNext();
629 } while (true);
630 }
631
632 /**
633 * Read token (main entrypoint.)
634 */
635 public Tokens.Token readToken() {
636 sb.setLength(0);
637 name = null;
638 radix = 0;
639 hasEscapeSequences = false;
640
641 int pos = 0;
642 List<Tokens.Comment> comments = null;
643
644 try {
645 loop:
646 while (true) {
647 pos = position();
648
649 switch (get()) {
650 case ' ': // (Spec 3.6)
651 case '\t': // (Spec 3.6)
652 case '\f': // (Spec 3.6)
653 skipWhitespace();
654 processWhiteSpace(pos, position());
655 break;
656
657 case '\n': // (Spec 3.4)
658 next();
659 processLineTerminator(pos, position());
660 break;
661
662 case '\r': // (Spec 3.4)
663 next();
664 accept('\n');
665 processLineTerminator(pos, position());
666 break;
667
668 case 'A':
669 case 'B':
670 case 'C':
671 case 'D':
672 case 'E':
673 case 'F':
674 case 'G':
675 case 'H':
676 case 'I':
677 case 'J':
678 case 'K':
679 case 'L':
680 case 'M':
681 case 'N':
682 case 'O':
683 case 'P':
684 case 'Q':
685 case 'R':
686 case 'S':
687 case 'T':
688 case 'U':
689 case 'V':
690 case 'W':
691 case 'X':
692 case 'Y':
693 case 'Z':
694 case 'a':
695 case 'b':
696 case 'c':
697 case 'd':
698 case 'e':
699 case 'f':
700 case 'g':
701 case 'h':
702 case 'i':
703 case 'j':
704 case 'k':
705 case 'l':
706 case 'm':
707 case 'n':
708 case 'o':
709 case 'p':
710 case 'q':
711 case 'r':
712 case 's':
713 case 't':
714 case 'u':
715 case 'v':
716 case 'w':
717 case 'x':
718 case 'y':
719 case 'z':
720 case '$':
721 case '_': // (Spec. 3.8)
722 scanIdent();
723 break loop;
724
725 case '%':
726 scanIdent(Tokens.TokenKind.VALUE_IDENTIFIER);
727 break loop;
728
729 case '0': // (Spec. 3.10)
730 next();
731
732 if (acceptOneOf('x', 'X')) {
733 skipIllegalUnderscores();
734 scanNumber(pos, 16);
735 } else if (acceptOneOf('b', 'B')) {
736 skipIllegalUnderscores();
737 scanNumber(pos, 2);
738 } else {
739 put('0');
740
741 if (is('_')) {
742 int savePos = position();
743 skip('_');
744
745 if (digit(pos, 10) < 0) {
746 lexError(savePos, Errors.IllegalUnderscore);
747 }
748 }
749
750 scanNumber(pos, 8);
751 }
752 break loop;
753
754 case '1':
755 case '2':
756 case '3':
757 case '4':
758 case '5':
759 case '6':
760 case '7':
761 case '8':
762 case '9': // (Spec. 3.10)
763 scanNumber(pos, 10);
764 break loop;
765
766 case '.': // (Spec. 3.12)
767 next();
768 int savePos = position();
769
770 if (accept('.')) {
771 lexError(savePos, Errors.IllegalDot);
772 } else if (digit(pos, 10) >= 0) {
773 put('.');
774 scanFractionAndSuffix(pos); // (Spec. 3.10)
775 } else {
776 tk = Tokens.TokenKind.DOT;
777 }
778 break loop;
779
780 case ',': // (Spec. 3.12)
781 next();
782 tk = Tokens.TokenKind.COMMA;
783 break loop;
784
785 case '(': // (Spec. 3.12)
786 next();
787 tk = Tokens.TokenKind.LPAREN;
788 break loop;
789
790 case ')': // (Spec. 3.12)
791 next();
792 tk = Tokens.TokenKind.RPAREN;
793 break loop;
794
795 case '[': // (Spec. 3.12)
796 next();
797 tk = Tokens.TokenKind.LBRACKET;
798 break loop;
799
800 case ']': // (Spec. 3.12)
801 next();
802 tk = Tokens.TokenKind.RBRACKET;
803 break loop;
804
805 case '{': // (Spec. 3.12)
806 next();
807 tk = Tokens.TokenKind.LBRACE;
808 break loop;
809
810 case '}': // (Spec. 3.12)
811 next();
812 tk = Tokens.TokenKind.RBRACE;
813 break loop;
814
815 case '?':
816 next();
817 tk = Tokens.TokenKind.QUES;
818 break loop;
819
820 case ';':
821 next();
822 tk = Tokens.TokenKind.SEMI;
823 break loop;
824
825 case ':':
826 next();
827 if (accept(':')) {
828 tk = Tokens.TokenKind.COLCOL;
829 } else {
830 tk = Tokens.TokenKind.COLON;
831 }
832 break loop;
833
834 case '&':
835 next();
836 tk = Tokens.TokenKind.AMP;
837 break loop;
838
839 case '@':
840 next();
841 tk = Tokens.TokenKind.MONKEYS_AT;
842 break loop;
843
844 case '^':
845 next();
846 tk = Tokens.TokenKind.CARET;
847 break loop;
848
849 case '=':
850 next();
851 tk = Tokens.TokenKind.EQ;
852 break loop;
853
854 case '<':
855 next();
856 tk = Tokens.TokenKind.LT;
857 break loop;
858
859 case '>':
860 next();
861 tk = Tokens.TokenKind.GT;
862 break loop;
863
864 case '#':
865 next();
866 tk = TokenKind.HASH;
867 break loop;
868
869 case '+':
870 next();
871 tk = Tokens.TokenKind.PLUS;
872 break loop;
873
874 case '-':
875 next();
876 if (accept('>')) {
877 tk = Tokens.TokenKind.ARROW;
878 } else {
879 tk = Tokens.TokenKind.SUB;
880 }
881 break loop;
882
883 case '/':
884 next();
885
886 if (accept('/')) { // (Spec. 3.7)
887 skipToEOLN();
888
889 if (isAvailable()) {
890 comments = appendComment(comments, processComment(pos, position(), Tokens.Comment.CommentStyle.LINE));
891 }
892 break;
893 } else if (accept('*')) { // (Spec. 3.7)
894 while (isAvailable()) {
895 if (accept('*')) {
896 if (is('/')) {
897 break;
898 }
899 } else {
900 next();
901 }
902 }
903
904 if (accept('/')) {
905 comments = appendComment(comments, processComment(pos, position(), Tokens.Comment.CommentStyle.BLOCK));
906
907 break;
908 } else {
909 lexError(pos, Errors.UnclosedComment);
910
911 break loop;
912 }
913 } else {
914 lexError(pos, Errors.UnclosedComment);
915 }
916 break loop;
917
918 case '\'': // (Spec. 3.10)
919 next();
920
921 if (accept('\'')) {
922 lexError(pos, Errors.EmptyCharLit);
923 } else {
924 if (isEOLN()) {
925 lexError(pos, Errors.IllegalLineEndInCharLit);
926 }
927
928 scanLitChar(pos);
929
930 if (accept('\'')) {
931 tk = Tokens.TokenKind.CHARLITERAL;
932 } else {
933 lexError(pos, Errors.UnclosedCharLit);
934 }
935 }
936 break loop;
937
938 case '\"': // (Spec. 3.10)
939 scanString(pos);
940 break loop;
941
942 default:
943 boolean isJavaIdentifierStart;
944
945 if (isASCII()) {
946 // all ASCII range chars already handled, above
947 isJavaIdentifierStart = false;
948 } else {
949 isJavaIdentifierStart = isSurrogate()
950 ? Character.isJavaIdentifierStart(getCodepoint())
951 : Character.isJavaIdentifierStart(get());
952 }
953
954 if (isJavaIdentifierStart) {
955 scanIdent();
956 } else if (digit(pos, 10) >= 0) {
957 scanNumber(pos, 10);
958 } else if (is((char) EOI) || !isAvailable()) {
959 tk = Tokens.TokenKind.EOF;
960 pos = position();
961 } else {
962 String arg;
963
964 if (isSurrogate()) {
965 int codePoint = getCodepoint();
966 char hi = Character.highSurrogate(codePoint);
967 char lo = Character.lowSurrogate(codePoint);
968 arg = String.format("\\u%04x\\u%04x", (int) hi, (int) lo);
969 } else {
970 char ch = get();
971 arg = (32 < ch && ch < 127) ? String.format("%s", ch) :
972 String.format("\\u%04x", (int) ch);
973 }
974
975 lexError(pos, Errors.IllegalChar(arg));
976 next();
977 }
978 break loop;
979 }
980 }
981
982 int endPos = position();
983
984 if (tk.tag == Tokens.Token.Tag.DEFAULT) {
985 return new Tokens.Token(tk, pos, endPos, comments);
986 } else if (tk.tag == Tokens.Token.Tag.NAMED) {
987 return new Tokens.NamedToken(tk, pos, endPos, name, comments);
988 } else {
989 // Get characters from string buffer.
990 String string = sb.toString();
991
992 // Translate escape sequences if present.
993 if (hasEscapeSequences) {
994 try {
995 string = string.translateEscapes();
996 } catch (Exception ex) {
997 // Error already reported, just use untranslated string.
998 }
999 }
1000
1001 if (tk.tag == Tokens.Token.Tag.STRING) {
1002 // Build string token.
1003 return new Tokens.StringToken(tk, pos, endPos, string, comments);
1004 } else {
1005 // Build numeric token.
1006 return new Tokens.NumericToken(tk, pos, endPos, string, radix, comments);
1007 }
1008 }
1009 } finally {
1010 int endPos = position();
1011
1012 if (scannerDebug) {
1013 System.out.println("nextToken(" + pos
1014 + "," + endPos + ")=|" +
1015 new String(getRawCharacters(pos, endPos))
1016 + "| " + tk.name());
1017 }
1018 }
1019 }
1020
1021 /**
1022 * Appends a comment to the list of comments preceding the current token.
1023 *
1024 * @param comments existing list of comments.
1025 * @param comment comment to append.
1026 * @return new list with comment prepended to the existing list.
1027 */
1028 List<Tokens.Comment> appendComment(List<Tokens.Comment> comments, Tokens.Comment comment) {
1029 if (comments == null) {
1030 comments = new ArrayList<>();
1031 }
1032 // prepend
1033 comments.add(0, comment);
1034 return comments;
1035 }
1036
1037 /**
1038 * Return the position where a lexical error occurred.
1039 *
1040 * @return position in the input buffer of where the error occurred.
1041 */
1042 public int errPos() {
1043 return errPos;
1044 }
1045
1046 /**
1047 * Set the position where a lexical error occurred.
1048 *
1049 * @param pos position in the input buffer of where the error occurred.
1050 */
1051 public void errPos(int pos) {
1052 errPos = pos;
1053 }
1054
1055 /**
1056 * Called when a complete comment has been scanned. pos and endPos
1057 * will mark the comment boundary.
1058 *
1059 * @param pos position of the opening / in the input buffer.
1060 * @param endPos position + 1 of the closing / in the input buffer.
1061 * @param style style of comment.
1062 * @return the constructed BasicComment.
1063 */
1064 Tokens.Comment processComment(int pos, int endPos, Tokens.Comment.CommentStyle style) {
1065 if (scannerDebug) {
1066 System.out.println("processComment(" + pos
1067 + "," + endPos + "," + style + ")=|"
1068 + new String(getRawCharacters(pos, endPos))
1069 + "|");
1070 }
1071
1072 char[] buf = getRawCharacters(pos, endPos);
1073 return new BasicComment(style, new String(buf));
1074 }
1075
1076 /**
1077 * Called when a complete whitespace run has been scanned. pos and endPos
1078 * will mark the whitespace boundary.
1079 * <p>
1080 * (Spec 3.6)
1081 *
1082 * @param pos position in input buffer of first whitespace character.
1083 * @param endPos position + 1 in input buffer of last whitespace character.
1084 */
1085 void processWhiteSpace(int pos, int endPos) {
1086 if (scannerDebug) {
1087 System.out.println("processWhitespace(" + pos
1088 + "," + endPos + ")=|" +
1089 new String(getRawCharacters(pos, endPos))
1090 + "|");
1091 }
1092 }
1093
1094 /**
1095 * Called when a line terminator has been processed.
1096 *
1097 * @param pos position in input buffer of first character in sequence.
1098 * @param endPos position + 1 in input buffer of last character in sequence.
1099 */
1100 void processLineTerminator(int pos, int endPos) {
1101 if (scannerDebug) {
1102 System.out.println("processTerminator(" + pos
1103 + "," + endPos + ")=|" +
1104 new String(getRawCharacters(pos, endPos))
1105 + "|");
1106 }
1107 }
1108
1109 /**
1110 * Build a map for translating between line numbers and positions in the input.
1111 *
1112 * @return a LineMap
1113 */
1114 public Position.LineMap getLineMap() {
1115 return Position.makeLineMap(getRawCharacters(), length(), false);
1116 }
1117
1118 /**
1119 * Scan a documentation comment; determine if a deprecated tag is present.
1120 * Called once the initial /, * have been skipped, positioned at the second *
1121 * (which is treated as the beginning of the first line).
1122 * Stops positioned at the closing '/'.
1123 *
1124 * @param style Style of comment
1125 * LINE starting with //
1126 * BLOCK starting with /*
1127 */
1128 record BasicComment(Tokens.Comment.CommentStyle style, String text) implements Tokens.Comment {
1129 }
1130 }