1 /*
2 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.openjdk.asmtools.jasm;
24
25 import static java.lang.String.format;
26 import static org.openjdk.asmtools.jasm.JasmTokens.*;
27 import static org.openjdk.asmtools.jasm.Constants.EOF;
28 import static org.openjdk.asmtools.jasm.Constants.OFFSETBITS;
29 import java.io.IOException;
30 import java.util.function.Predicate;
31
32 /**
33 * A Scanner for Jasm tokens. Errors are reported to the environment object.<p>
34 *
35 * The scanner keeps track of the current token, the value of the current token (if any),
36 * and the start position of the current token.<p>
37 *
38 * The scan() method advances the scanner to the next token in the input.<p>
39 *
40 * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[')
41 * with their closing counter part. This is useful during error recovery.<p>
42 *
43 * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p>
44 */
45 public class Scanner extends ParseBase {
46
47 /**
48 * SyntaxError is the generic error thrown for parsing problems.
49 */
50 protected static class SyntaxError extends Error {
51 boolean fatalError = false;
52 SyntaxError Fatal() { fatalError = true; return this; }
53 boolean isFatal() {return fatalError;}
54 }
55
56 /**
57 * Input stream
58 */
59 protected Environment in;
60
61 /**
62 * The current character
63 */
64 protected int ch;
65
66 /**
67 * Current token
68 */
69 // protected int token;
70 protected Token token;
71
72 /**
73 * The position of the current token
74 */
75 protected int pos;
76
77 /*
78 * Token values.
79 */
80 protected char charValue;
81 protected int intValue;
82 protected long longValue;
83 protected float floatValue;
84 protected double doubleValue;
85 protected String stringValue;
86 protected String idValue;
87 protected int radix; // Radix, when reading int or long
88
89 /* doc comment preceding the most recent token */
90 protected String docComment;
91
92 /* A growable character buffer. */
93 private int count;
94 private char buffer[] = new char[32];
95 //
96 private Predicate<Integer> escapingAllowed;
97 /**
98 * The position of the previous token
99 */
100 protected int prevPos;
101 protected int sign; // sign, when reading number
102 protected boolean inBits; // inBits prefix, when reading number
103
104 /**
105 * main constructor.
106 *
107 * Create a scanner to scan an input stream.
108 */
109 protected Scanner(Environment env) throws IOException {
110 super.init(this, null, env);
111 escapingAllowed = noFunc;
112 this.in = env;
113 ch = env.read();
114 xscan();
115 }
116
117 protected void scanModuleStatement() throws IOException {
118 try {
119 escapingAllowed = yesAndProcessFunc;
120 scan();
121 } finally {
122 escapingAllowed = noFunc;
123 }
124 }
125
126 /**
127 * scan
128 *
129 * Scan the next token.
130 *
131 * @throws IOException
132 */
133 protected void scan() throws IOException {
134 int signloc = 1, cnt = 0;
135 prevPos = pos;
136 prefix:
137 for (;;) {
138 xscan();
139 switch (token) {
140 case SIGN:
141 signloc = signloc * intValue;
142 break;
143 default:
144 break prefix;
145 }
146 cnt++;
147 }
148 switch (token) {
149 case INTVAL:
150 case LONGVAL:
151 case FLOATVAL:
152 case DOUBLEVAL:
153 case INF:
154 case NAN:
155 sign = signloc;
156 break;
157 default:
158 }
159 }
160
161 /**
162 * Check the token may be identifier
163 */
164 protected final boolean checkTokenIdent() {
165 return token.possibleJasmIdentifier();
166 }
167
168 static String readableConstant(int t) {
169 return "<" + Tables.tag(t) + "> [" + t + "]";
170 }
171
172 /**
173 * Expects a token, scans the next token or throws an exception.
174 */
175 protected final void expect(Token t) throws SyntaxError, IOException {
176 check(t);
177 scan();
178 }
179
180 /**
181 * Checks a token, throws an exception if not the same
182 */
183 protected final void check(Token t) throws SyntaxError, IOException {
184 if (token != t) {
185 if ((t != Token.IDENT) || !checkTokenIdent()) {
186 env.traceln("expect: " + t + " instead of " + token);
187 switch (t) {
188 case IDENT:
189 env.error(pos, "identifier.expected");
190 break;
191 default:
192 env.error(pos, "token.expected", "<" + t.printValue() + ">");
193 break;
194 }
195
196 if (debugFlag) {
197 debugStr("<<<<<PROBLEM>>>>>>>: ");
198 throw new Error("<<<<<PROBLEM>>>>>>>");
199 } else {
200 throw new SyntaxError();
201 }
202 }
203 }
204 }
205
206 private void putCh(int ch) {
207 if (count == buffer.length) {
208 char newBuffer[] = new char[buffer.length * 2];
209 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
210 buffer = newBuffer;
211 }
212 buffer[count++] = (char) ch;
213 }
214
215 private String bufferString() {
216 char buf[] = new char[count];
217 System.arraycopy(buffer, 0, buf, 0, count);
218 return new String(buf);
219 }
220
221 /**
222 * Returns true if the character is a unicode digit.
223 *
224 * @param ch the character to be checked
225 */
226 public static boolean isUCDigit(int ch) {
227 if ((ch >= '0') && (ch <= '9')) {
228 return true;
229 }
230 switch (ch >> 8) {
231 case 0x06:
232 return ((ch >= 0x0660) && (ch <= 0x0669)) || // Arabic-Indic
233 ((ch >= 0x06f0) && (ch <= 0x06f9)); // Eastern Arabic-Indic
234 case 0x07:
235 case 0x08:
236 default:
237 return false;
238 case 0x09:
239 return ((ch >= 0x0966) && (ch <= 0x096f)) || // Devanagari
240 ((ch >= 0x09e6) && (ch <= 0x09ef)); // Bengali
241 case 0x0a:
242 return ((ch >= 0x0a66) && (ch <= 0x0a6f)) || // Gurmukhi
243 ((ch >= 0x0ae6) && (ch <= 0x0aef)); // Gujarati
244 case 0x0b:
245 return ((ch >= 0x0b66) && (ch <= 0x0b6f)) || // Oriya
246 ((ch >= 0x0be7) && (ch <= 0x0bef)); // Tamil
247 case 0x0c:
248 return ((ch >= 0x0c66) && (ch <= 0x0c6f)) || // Telugu
249 ((ch >= 0x0ce6) && (ch <= 0x0cef)); // Kannada
250 case 0x0d:
251 return ((ch >= 0x0d66) && (ch <= 0x0d6f)); // Malayalam
252 case 0x0e:
253 return ((ch >= 0x0e50) && (ch <= 0x0e59)) || // Thai
254 ((ch >= 0x0ed0) && (ch <= 0x0ed9)); // Lao
255 case 0x0f:
256 return false;
257 case 0x10:
258 return ((ch >= 0x1040) && (ch <= 0x1049)); // Tibetan
259 }
260 }
261
262 /**
263 * Returns true if the character is a Unicode letter.
264 *
265 * @param ch the character to be checked
266 */
267 public static boolean isUCLetter(int ch) {
268 // fast check for Latin capitals and small letters
269 if (((ch >= 'A') && (ch <= 'Z'))
270 || ((ch >= 'a') && (ch <= 'z'))) {
271 return true;
272 }
273 // rest of ISO-LATIN-1
274 if (ch < 0x0100) {
275 // fast check
276 if (ch < 0x00c0) {
277 return (ch == '_') || (ch == '$');
278 }
279 // various latin letters and diacritics,
280 // but *not* the multiplication and division symbols
281 return ((ch >= 0x00c0) && (ch <= 0x00d6))
282 || ((ch >= 0x00d8) && (ch <= 0x00f6))
283 || ((ch >= 0x00f8) && (ch <= 0x00ff));
284 }
285 // other non CJK alphabets and symbols, but not digits
286 if (ch <= 0x1fff) {
287 return !isUCDigit(ch);
288 }
289 // rest are letters only in five ranges:
290 // Hiragana, Katakana, Bopomofo and Hangul
291 // CJK Squared Words
292 // Korean Hangul Symbols
293 // Han (Chinese, Japanese, Korean)
294 // Han compatibility
295 return ((ch >= 0x3040) && (ch <= 0x318f))
296 || ((ch >= 0x3300) && (ch <= 0x337f))
297 || ((ch >= 0x3400) && (ch <= 0x3d2d))
298 || ((ch >= 0x4e00) && (ch <= 0x9fff))
299 || ((ch >= 0xf900) && (ch <= 0xfaff));
300 }
301
302 /**
303 * Scan a comment. This method should be called once the initial /, * and the next
304 * character have been read.
305 */
306 private void skipComment() throws IOException {
307 while (true) {
308 switch (ch) {
309 case EOF:
310 env.error(pos, "eof.in.comment");
311 return;
312 case '*':
313 if ((ch = in.read()) == '/') {
314 ch = in.read();
315 return;
316 }
317 break;
318 default:
319 ch = in.read();
320 break;
321 }
322 }
323 }
324
325 /**
326 * Scan a doc comment. This method should be called once the initial /, * and * have
327 * been read. It gathers the content of the comment (without leading spaces and '*'s)
328 * in the string buffer.
329 */
330 @SuppressWarnings("empty-statement")
331 private String scanDocComment() throws IOException {
332 count = 0;
333
334 if (ch == '*') {
335 do {
336 ch = in.read();
337 } while (ch == '*');
338 if (ch == '/') {
339 ch = in.read();
340 return "";
341 }
342 }
343 switch (ch) {
344 case '\n':
345 case ' ':
346 ch = in.read();
347 break;
348 }
349
350 boolean seenstar = false;
351 int c = count;
352 while (true) {
353 switch (ch) {
354 case EOF:
355 env.error(pos, "eof.in.comment");
356 return bufferString();
357 case '\n':
358 putCh('\n');
359 ch = in.read();
360 seenstar = false;
361 c = count;
362 break;
363 case ' ':
364 case '\t':
365 putCh(ch);
366 ch = in.read();
367 break;
368 case '*':
369 if (seenstar) {
370 if ((ch = in.read()) == '/') {
371 ch = in.read();
372 count = c;
373 return bufferString();
374 }
375 putCh('*');
376 } else {
377 seenstar = true;
378 count = c;
379 while ((ch = in.read()) == '*');
380 switch (ch) {
381 case ' ':
382 ch = in.read();
383 break;
384 case '/':
385 ch = in.read();
386 count = c;
387 return bufferString();
388 }
389 }
390 break;
391 default:
392 if (!seenstar) {
393 seenstar = true;
394 }
395 putCh(ch);
396 ch = in.read();
397 c = count;
398 break;
399 }
400 }
401 }
402
403 /**
404 * Scan a decimal at this point
405 */
406 private void scanCPRef() throws IOException {
407 switch (ch = in.read()) {
408 case '0':
409 case '1':
410 case '2':
411 case '3':
412 case '4':
413 case '5':
414 case '6':
415 case '7':
416 case '8':
417 case '9': {
418 boolean overflow = false;
419 long value = ch - '0';
420 count = 0;
421 putCh(ch); // save character in buffer
422 numberLoop:
423 for (;;) {
424 switch (ch = in.read()) {
425 case '0':
426 case '1':
427 case '2':
428 case '3':
429 case '4':
430 case '5':
431 case '6':
432 case '7':
433 case '8':
434 case '9':
435 putCh(ch);
436 if (overflow) {
437 break;
438 }
439 value = (value * 10) + (ch - '0');
440 overflow = (value > 0xFFFF);
441 break;
442 default:
443 break numberLoop;
444 }
445 } // while true
446 intValue = (int) value;
447 stringValue = bufferString();
448 token = Token.CPINDEX;
449 if (overflow) {
450 env.error(pos, "overflow");
451 }
452 break;
453 }
454 default:
455 stringValue = Character.toString((char)ch);
456 env.error(in.pos, "invalid.number", stringValue);
457 intValue = 0;
458 token = Token.CPINDEX;
459 ch = in.read();
460 }
461 } // scanCPRef()
462
463 /**
464 * Scan a number. The first digit of the number should be the current character. We
465 * may be scanning hex, decimal, or octal at this point
466 */
467 private void scanNumber() throws IOException {
468 boolean seenNonOctal = false;
469 boolean overflow = false;
470 radix = (ch == '0' ? 8 : 10);
471 long value = ch - '0';
472 count = 0;
473 putCh(ch); // save character in buffer
474 numberLoop:
475 for (;;) {
476 switch (ch = in.read()) {
477 case '.':
478 if (radix == 16) {
479 break numberLoop; // an illegal character
480 }
481 scanReal();
482 return;
483
484 case '8':
485 case '9':
486 // We can't yet throw an error if reading an octal. We might
487 // discover we're really reading a real.
488 seenNonOctal = true;
489 case '0':
490 case '1':
491 case '2':
492 case '3':
493 case '4':
494 case '5':
495 case '6':
496 case '7':
497 putCh(ch);
498 if (radix == 10) {
499 overflow = overflow || (value * 10) / 10 != value;
500 value = (value * 10) + (ch - '0');
501 overflow = overflow || (value - 1 < -1);
502 } else if (radix == 8) {
503 overflow = overflow || (value >>> 61) != 0;
504 value = (value << 3) + (ch - '0');
505 } else {
506 overflow = overflow || (value >>> 60) != 0;
507 value = (value << 4) + (ch - '0');
508 }
509 break;
510 case 'd':
511 case 'D':
512 case 'e':
513 case 'E':
514 case 'f':
515 case 'F':
516 if (radix != 16) {
517 scanReal();
518 return;
519 }
520 // fall through
521 case 'a':
522 case 'A':
523 case 'b':
524 case 'B':
525 case 'c':
526 case 'C':
527 putCh(ch);
528 if (radix != 16) {
529 break numberLoop; // an illegal character
530 }
531 overflow = overflow || (value >>> 60) != 0;
532 value = (value << 4) + 10
533 + Character.toLowerCase((char) ch) - 'a';
534 break;
535 case 'l':
536 case 'L':
537 ch = in.read(); // skip over 'l'
538 longValue = value;
539 token = Token.LONGVAL;
540 break numberLoop;
541 case 'x':
542 case 'X':
543 // if the first character is a '0' and this is the second
544 // letter, then read in a hexadecimal number. Otherwise, error.
545 if (count == 1 && radix == 8) {
546 radix = 16;
547 break;
548 } else {
549 // we'll get an illegal character error
550 break numberLoop;
551 }
552 default:
553 intValue = (int) value;
554 token = Token.INTVAL;
555 break numberLoop;
556 }
557 } // while true
558 // we have just finished reading the number. The next thing better
559 // not be a letter or digit.
560 if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') {
561 env.error(in.pos, "invalid.number", Character.toString((char)ch));
562 do {
563 ch = in.read();
564 } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.');
565 intValue = 0;
566 token = Token.INTVAL;
567 } else if (radix == 8 && seenNonOctal) {
568 intValue = 0;
569 token = Token.INTVAL;
570 env.error(in.pos, "invalid.octal.number");
571 } else if (overflow
572 || (token == Token.INTVAL
573 && ((radix == 10) ? (intValue - 1 < -1)
574 : ((value & 0xFFFFFFFF00000000L) != 0)))) {
575 intValue = 0; // so we don't get second overflow in Parser
576 longValue = 0;
577 env.error(pos, "overflow");
578 }
579 } // scanNumber()
580
581 /**
582 * Scan a float. We are either looking at the decimal, or we have already seen it and
583 * put it into the buffer. We haven't seen an exponent. Scan a float. Should be called
584 * with the current character is either the 'e', 'E' or '.'
585 */
586 private void scanReal() throws IOException {
587 boolean seenExponent = false;
588 boolean isSingleFloat = false;
589 char lastChar;
590 if (ch == '.') {
591 putCh(ch);
592 ch = in.read();
593 }
594
595 numberLoop:
596 for (;; ch = in.read()) {
597 switch (ch) {
598 case '0':
599 case '1':
600 case '2':
601 case '3':
602 case '4':
603 case '5':
604 case '6':
605 case '7':
606 case '8':
607 case '9':
608 putCh(ch);
609 break;
610 case 'e':
611 case 'E':
612 if (seenExponent) {
613 break numberLoop; // we'll get a format error
614 }
615 putCh(ch);
616 seenExponent = true;
617 break;
618 case '+':
619 case '-':
620 lastChar = buffer[count - 1];
621 if (lastChar != 'e' && lastChar != 'E') {
622 break numberLoop; // this isn't an error, though!
623 }
624 putCh(ch);
625 break;
626 case 'f':
627 case 'F':
628 ch = in.read(); // skip over 'f'
629 isSingleFloat = true;
630 break numberLoop;
631 case 'd':
632 case 'D':
633 ch = in.read(); // skip over 'd'
634 // fall through
635 default:
636 break numberLoop;
637 } // sswitch
638 } // loop
639
640 // we have just finished reading the number. The next thing better
641 // not be a letter or digit.
642 if (isUCDigit(ch) || isUCLetter(ch) || ch == '.') {
643 env.error(in.pos, "invalid.number", Character.toString((char)ch));
644 do {
645 ch = in.read();
646 } while (isUCDigit(ch) || isUCLetter(ch) || ch == '.');
647 doubleValue = 0;
648 token = Token.DOUBLEVAL;
649 } else {
650 token = isSingleFloat ? Token.FLOATVAL : Token.DOUBLEVAL;
651 try {
652 lastChar = buffer[count - 1];
653 if (lastChar == 'e' || lastChar == 'E'
654 || lastChar == '+' || lastChar == '-') {
655 env.error(in.pos - 1, "float.format");
656 } else if (isSingleFloat) {
657 floatValue = Float.valueOf(bufferString());
658 if (Float.isInfinite(floatValue)) {
659 env.error(pos, "overflow");
660 }
661 } else {
662 doubleValue = Double.valueOf(bufferString());
663 if (Double.isInfinite(doubleValue)) {
664 env.error(pos, "overflow");
665 env.error(pos, "overflow");
666 }
667 }
668 } catch (NumberFormatException ee) {
669 env.error(pos, "float.format");
670 doubleValue = 0;
671 floatValue = 0;
672 }
673 }
674 } // scanReal
675
676 /**
677 * Scan an escape character.
678 *
679 * @return the character or '\\'
680 */
681 private int scanEscapeChar() throws IOException {
682 int p = in.pos;
683
684 switch (ch = in.read()) {
685 case '0':
686 case '1':
687 case '2':
688 case '3':
689 case '4':
690 case '5':
691 case '6':
692 case '7': {
693 int n = ch - '0';
694 for (int i = 2; i > 0; i--) {
695 switch (ch = in.read()) {
696 case '0':
697 case '1':
698 case '2':
699 case '3':
700 case '4':
701 case '5':
702 case '6':
703 case '7':
704 n = (n << 3) + ch - '0';
705 break;
706 default:
707 if (n > 0xFF) {
708 env.error(p, "invalid.escape.char");
709 }
710 return n;
711 }
712 }
713 ch = in.read();
714 if (n > 0xFF) {
715 env.error(p, "invalid.escape.char");
716 }
717 return n;
718 }
719 case 'r':
720 ch = in.read();
721 return '\r';
722 case 'n':
723 ch = in.read();
724 return '\n';
725 case 'f':
726 ch = in.read();
727 return '\f';
728 case 'b':
729 ch = in.read();
730 return '\b';
731 case 't':
732 ch = in.read();
733 return '\t';
734 case '\\':
735 ch = in.read();
736 return '\\';
737 case '\"':
738 ch = in.read();
739 return '\"';
740 case '\'':
741 ch = in.read();
742 return '\'';
743 case 'u':
744 int unich = in.convertUnicode();
745 ch = in.read();
746 return unich;
747 }
748 return '\\';
749 }
750
751 /**
752 * Scan a string. The current character should be the opening " of the string.
753 */
754 private void scanString() throws IOException {
755 token = Token.STRINGVAL;
756 count = 0;
757 ch = in.read();
758
759 // Scan a String
760 while (true) {
761 switch (ch) {
762 case EOF:
763 env.error(pos, "eof.in.string");
764 stringValue = bufferString();
765 return;
766 case '\n':
767 ch = in.read();
768 env.error(pos, "newline.in.string");
769 stringValue = bufferString();
770 return;
771 case '"':
772 ch = in.read();
773 stringValue = bufferString();
774 return;
775 case '\\': {
776 int c = scanEscapeChar();
777 if (c >= 0) {
778 putCh((char) c);
779 }
780 break;
781 }
782 default:
783 putCh(ch);
784 ch = in.read();
785 break;
786 }
787 }
788 }
789
790
791 /**
792 * Scan an Identifier. The current character should be the first character of the
793 * identifier.
794 */
795 private void scanIdentifier(char[] prefix) throws IOException {
796 int firstChar;
797 count = 0;
798 if(prefix != null) {
799 for(;;) {
800 for (int i = 0; i < prefix.length; i++)
801 putCh(prefix[i]);
802 ch = in.read();
803 if (ch == '\\') {
804 ch = in.read();
805 if (ch == 'u') {
806 ch = in.convertUnicode();
807 if (!isUCLetter(ch) && !isUCDigit(ch)) {
808 prefix = new char[]{(char)ch};
809 continue;
810 }
811 } else if (escapingAllowed.test(ch)) {
812 prefix = new char[]{(char)ch};
813 continue;
814 }
815 int p = in.pos;
816 env.error(p, "invalid.escape.char");
817 }
818 break;
819 }
820 }
821 firstChar = ch;
822 boolean firstIteration = true;
823 scanloop:
824 while (true) {
825 putCh(ch);
826 ch = in.read();
827
828 // Check to see if the annotation marker is at
829 // the front of the identifier.
830 if (firstIteration && firstChar == '@') {
831 // May be a type annotation
832 if (ch == 'T') { // type annotation
833 putCh(ch);
834 ch = in.read();
835 }
836
837 // is either a runtime visible or invisible annotation
838 if (ch == '+' || ch == '-') { // regular annotation
839 // possible annotation -
840 // need to eat up the '@+' or '@-'
841 putCh(ch);
842 ch = in.read();
843 }
844 idValue = bufferString();
845 stringValue = idValue;
846 token = Token.ANNOTATION;
847 return;
848 }
849
850 firstIteration = false;
851 switch (ch) {
852 case 'a':
853 case 'b':
854 case 'c':
855 case 'd':
856 case 'e':
857 case 'f':
858 case 'g':
859 case 'h':
860 case 'i':
861 case 'j':
862 case 'k':
863 case 'l':
864 case 'm':
865 case 'n':
866 case 'o':
867 case 'p':
868 case 'q':
869 case 'r':
870 case 's':
871 case 't':
872 case 'u':
873 case 'v':
874 case 'w':
875 case 'x':
876 case 'y':
877 case 'z':
878 case 'A':
879 case 'B':
880 case 'C':
881 case 'D':
882 case 'E':
883 case 'F':
884 case 'G':
885 case 'H':
886 case 'I':
887 case 'J':
888 case 'K':
889 case 'L':
890 case 'M':
891 case 'N':
892 case 'O':
893 case 'P':
894 case 'Q':
895 case 'R':
896 case 'S':
897 case 'T':
898 case 'U':
899 case 'V':
900 case 'W':
901 case 'X':
902 case 'Y':
903 case 'Z':
904 case '0':
905 case '1':
906 case '2':
907 case '3':
908 case '4':
909 case '5':
910 case '6':
911 case '7':
912 case '8':
913 case '9':
914 case '$':
915 case '_':
916 case '-':
917 case '[':
918 case ']':
919 case '(':
920 case ')':
921 case '<':
922 case '>':
923 break;
924 case '/': {// may be comment right after identifier
925 int c = in.lookForward();
926 if ((c == '*') || (c == '/')) {
927 break scanloop; // yes, comment
928 }
929 break; // no, continue to parse identifier
930 }
931 case '\\':
932 ch = in.read();
933 if ( ch == 'u') {
934 ch = in.convertUnicode();
935 if (isUCLetter(ch) || isUCDigit(ch)) {
936 break;
937 }
938 } else if( escapingAllowed.test(ch)) {
939 break;
940 }
941 int p = in.pos;
942 env.error(p, "invalid.escape.char");
943 default:
944 // if ((!isUCDigit(ch)) && (!isUCLetter(ch))) {
945 break scanloop;
946 // }
947 } // end switch
948 } // end scanloop
949 idValue = bufferString();
950 stringValue = idValue;
951 token = keyword_token_ident(idValue);
952 debugStr(format("##### SCANNER (scanIdent) ######## token = %s value = \"%s\"\n", token, idValue));
953 } // end scanIdentifier
954
955 //==============================
956 @SuppressWarnings("empty-statement")
957 protected final void xscan() throws IOException {
958 docComment = null;
959 loop:
960 for (;;) {
961 pos = in.pos;
962 switch (ch) {
963 case EOF:
964 token = Token.EOF;
965 break loop;
966 case '\n':
967 case ' ':
968 case '\t':
969 case '\f':
970 ch = in.read();
971 break;
972 case '/':
973 switch (ch = in.read()) {
974 case '/':
975 // Parse a // comment
976 while (((ch = in.read()) != EOF) && (ch != '\n'));
977 break;
978 case '*':
979 ch = in.read();
980 if (ch == '*') {
981 docComment = scanDocComment();
982 } else {
983 skipComment();
984 }
985 break;
986 default:
987 token = Token.DIV;
988 break loop;
989 }
990 break;
991 case '"':
992 scanString();
993 break loop;
994 case '-':
995 intValue = -1;
996 token = Token.SIGN;
997 ch = in.read();
998 break loop;
999 case '+':
1000 intValue = +1;
1001 ch = in.read();
1002 token = Token.SIGN;
1003 break loop;
1004 case '0':
1005 case '1':
1006 case '2':
1007 case '3':
1008 case '4':
1009 case '5':
1010 case '6':
1011 case '7':
1012 case '8':
1013 case '9':
1014 scanNumber();
1015 break loop;
1016 case '.':
1017 switch (ch = in.read()) {
1018 case '0':
1019 case '1':
1020 case '2':
1021 case '3':
1022 case '4':
1023 case '5':
1024 case '6':
1025 case '7':
1026 case '8':
1027 case '9':
1028 count = 0;
1029 putCh('.');
1030 scanReal();
1031 break;
1032 default:
1033 token = Token.FIELD;
1034 }
1035 break loop;
1036 case '{':
1037 ch = in.read();
1038 token = Token.LBRACE;
1039 break loop;
1040 case '}':
1041 ch = in.read();
1042 token = Token.RBRACE;
1043 break loop;
1044 case ',':
1045 ch = in.read();
1046 token = Token.COMMA;
1047 break loop;
1048 case ';':
1049 ch = in.read();
1050 token = Token.SEMICOLON;
1051 break loop;
1052 case ':':
1053 ch = in.read();
1054 token = Token.COLON;
1055 break loop;
1056 case '=':
1057 if ((ch = in.read()) == '=') {
1058 ch = in.read();
1059 token = Token.EQ;
1060 break loop;
1061 }
1062 token = Token.ASSIGN;
1063 break loop;
1064 case 'a':
1065 case 'b':
1066 case 'c':
1067 case 'd':
1068 case 'e':
1069 case 'f':
1070 case 'g':
1071 case 'h':
1072 case 'i':
1073 case 'j':
1074 case 'k':
1075 case 'l':
1076 case 'm':
1077 case 'n':
1078 case 'o':
1079 case 'p':
1080 case 'q':
1081 case 'r':
1082 case 's':
1083 case 't':
1084 case 'u':
1085 case 'v':
1086 case 'w':
1087 case 'x':
1088 case 'y':
1089 case 'z':
1090 case 'A':
1091 case 'B':
1092 case 'C':
1093 case 'D':
1094 case 'E':
1095 case 'F':
1096 case 'G':
1097 case 'H':
1098 case 'I':
1099 case 'J':
1100 case 'K':
1101 case 'L':
1102 case 'M':
1103 case 'N':
1104 case 'O':
1105 case 'P':
1106 case 'Q':
1107 case 'R':
1108 case 'S':
1109 case 'T':
1110 case 'U':
1111 case 'V':
1112 case 'W':
1113 case 'X':
1114 case 'Y':
1115 case 'Z':
1116 case '$':
1117 case '_':
1118 case '@':
1119 case '[':
1120 case ']':
1121 case '(':
1122 case ')':
1123 case '<':
1124 case '>':
1125 scanIdentifier(null);
1126 break loop;
1127 case '\u001a':
1128 // Our one concession to DOS.
1129 if ((ch = in.read()) == EOF) {
1130 token = Token.EOF;
1131 break loop;
1132 }
1133 env.error(pos, "funny.char");
1134 ch = in.read();
1135 break;
1136 case '#':
1137 int c = in.lookForward();
1138 if (c == '{') {
1139 // '#' char denotes a "paramMethod name" token
1140 ch = in.read();
1141 token = Token.PARAM_NAME;
1142 break loop;
1143 }
1144 // otherwise, it is a normal cpref
1145 scanCPRef();
1146 break loop;
1147 case '\\':
1148 ch = in.read();
1149 if ( ch == 'u') {
1150 ch = in.convertUnicode();
1151 if (isUCLetter(ch)) {
1152 scanIdentifier(null);
1153 break loop;
1154 }
1155 } else if( escapingAllowed.test(ch)) {
1156 scanIdentifier(new char[]{'\\', (char)ch});
1157 break loop;
1158 }
1159 // if ((ch = in.read()) == 'u') {
1160 // ch = in.convertUnicode();
1161 // if (isUCLetter(ch)) {
1162 // scanIdentifier();
1163 // break loop;
1164 // }
1165 // }
1166 default:
1167 env.out.println("funny.char:" + env.lineNumber(pos) + "/" + (pos & ((1 << OFFSETBITS) - 1)));
1168 env.error(pos, "funny.char");
1169 ch = in.read();
1170 }
1171 }
1172 }
1173
1174 @Override
1175 protected void debugScan(String dbstr) {
1176 if (token == null) {
1177 env.traceln(dbstr + "<<<NULL TOKEN>>>");
1178 return;
1179 }
1180 env.trace(dbstr + token);
1181 switch (token) {
1182 case IDENT:
1183 env.traceln(" = '" + stringValue + "' {idValue = '" + idValue + "'}");
1184 break;
1185 case STRINGVAL:
1186 env.traceln(" = {stringValue}: \"" + stringValue + "\"");
1187 break;
1188 case INTVAL:
1189 env.traceln(" = {intValue}: " + intValue + "}");
1190 break;
1191 case FLOATVAL:
1192 env.traceln(" = {floatValue}: " + floatValue);
1193 break;
1194 case DOUBLEVAL:
1195 env.traceln(" = {doubleValue}: " + doubleValue);
1196 break;
1197 default:
1198 env.traceln("");
1199 }
1200 }
1201
1202 private Predicate<Integer> noFunc = (ch)-> false;
1203 private Predicate<Integer> yesAndProcessFunc = (ch) -> {
1204 boolean res = ((ch == '\\') || (ch == ':') || (ch == '@'));
1205 if (res)
1206 putCh('\\');
1207 return res;
1208 };
1209 }