1 /*
2 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package org.openjdk.asmtools.jcoder;
24
25 import static org.openjdk.asmtools.jcoder.JcodTokens.*;
26
27 import java.io.IOException;
28 import java.util.HashMap;
29
30 /**
31 * A Scanner for Jcoder tokens. Errors are reported to the environment object.<p>
32 *
33 * The scanner keeps track of the current token, the value of the current token (if any),
34 * and the start position of the current token.<p>
35 *
36 * The scan() method advances the scanner to the next token in the input.<p>
37 *
38 * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[')
39 * with their closing counter part. This is useful during error recovery.<p>
40 *
41 * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p>
42 */
43 public class Scanner {
44 /*-------------------------------------------------------- */
45 /* Scanner Fields */
46
47 /**
48 * End of input
49 */
50 public static final int EOF = -1;
51 public static final int LBRACE = 123; // "{"
52 private boolean debugCP = false;
53 private int numCPentrs = 0;
54
55 /**
56 * Where errors are reported
57 */
58 protected SourceFile env;
59
60 /**
61 * Input stream
62 */
63 protected SourceFile in;
64 HashMap<String, String> macros;
65
66 /**
67 * The current character
68 */
69 protected int ch, prevCh = -1;
70 protected String macro;
71 protected int indexMacro;
72
73 /**
74 * Current token
75 */
76 protected Token token;
77
78 /**
79 * The position of the current token
80 */
81 protected int pos;
82
83 /**
84 * The position of the previous token
85 */
86 protected int prevPos;
87
88 /* Token values. */
89 protected long longValue;
90 protected int intValue;
91 protected int intSize;
92 protected String stringValue;
93 protected ByteBuffer longStringValue;
94 protected int sign; // sign, when reading number
95
96 /* A doc comment preceding the most recent token */
97 protected String docComment;
98
99 /**
100 * A growable character buffer.
101 */
102 private int count;
103 private char[] buffer = new char[32];
104
105 /*-------------------------------------------------------- */
106 /**
107 * Create a scanner to scan an input stream.
108 */
109 protected Scanner(SourceFile sf, HashMap<String, String> macros)
110 throws IOException {
111 this.env = sf;
112 this.in = sf;
113 this.macros = macros;
114
115 ch = sf.read();
116 prevPos = sf.pos;
117
118 scan();
119 }
120
121 /**
122 * for use in jcfront.
123 */
124 protected Scanner(SourceFile sf)
125 throws IOException {
126 this.env = sf;
127 this.in = sf;
128 this.macros = new HashMap<>();
129
130 ch = sf.read();
131 prevPos = sf.pos;
132
133 scan();
134 }
135
136 /* *********************************************** */
137 void setDebugCP(boolean enable) {
138 if (enable) {
139 numCPentrs = 0;
140 }
141 debugCP = enable;
142
143 }
144
145 void addConstDebug(ConstType ct) {
146 numCPentrs += 1;
147 env.traceln("\n Const[" + numCPentrs + "] = " + ct.printval());
148 }
149
150 void setMacro(String macro) {
151 this.macro = macro;
152 indexMacro = 0;
153 prevCh = ch;
154 }
155
156 void readCh() throws IOException {
157 if (macro != null) {
158 if (indexMacro < macro.length()) {
159 ch = macro.charAt(indexMacro);
160 }
161 macro = null;
162 }
163 if (prevCh >= 0) {
164 ch = prevCh;
165 prevCh = -1;
166 } else {
167 ch = in.read();
168 }
169 }
170
171 private void putc(int ch) {
172 if (count == buffer.length) {
173 char[] newBuffer = new char[buffer.length * 2];
174 System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
175 buffer = newBuffer;
176 }
177 buffer[count++] = (char) ch;
178 }
179
180 private String bufferString() {
181 char[] buf = new char[count];
182 System.arraycopy(buffer, 0, buf, 0, count);
183 return new String(buf);
184 }
185
186 /**
187 * Scan a comment. This method should be called once the initial /, * and the next
188 * character have been read.
189 */
190 private void skipComment() throws IOException {
191 while (true) {
192 switch (ch) {
193 case EOF:
194 env.error(pos, "eof.in.comment");
195 return;
196
197 case '*':
198 readCh();
199 if (ch == '/') {
200 readCh();
201 return;
202 }
203 break;
204
205 default:
206 readCh();
207 break;
208 }
209 }
210 }
211
212 /**
213 * Scan a doc comment. This method should be called once the initial /, * and * have
214 * been read. It gathers the content of the comment (witout leading spaces and '*'s)
215 * in the string buffer.
216 */
217 private String scanDocComment() throws IOException {
218 count = 0;
219
220 if (ch == '*') {
221 do {
222 readCh();
223 } while (ch == '*');
224 if (ch == '/') {
225 readCh();
226 return "";
227 }
228 }
229 switch (ch) {
230 case '\n':
231 case ' ':
232 readCh();
233 break;
234 }
235
236 boolean seenstar = false;
237 int c = count;
238 while (true) {
239 switch (ch) {
240 case EOF:
241 env.error(pos, "eof.in.comment");
242 return bufferString();
243
244 case '\n':
245 putc('\n');
246 readCh();
247 seenstar = false;
248 c = count;
249 break;
250
251 case ' ':
252 case '\t':
253 putc(ch);
254 readCh();
255 break;
256
257 case '*':
258 if (seenstar) {
259 readCh();
260 if (ch == '/') {
261 readCh();
262 count = c;
263 return bufferString();
264 }
265 putc('*');
266 } else {
267 seenstar = true;
268 count = c;
269 do {
270 readCh();
271 } while (ch == '*');
272 switch (ch) {
273 case ' ':
274 readCh();
275 break;
276
277 case '/':
278 readCh();
279 count = c;
280 return bufferString();
281 }
282 }
283 break;
284
285 default:
286 if (!seenstar) {
287 seenstar = true;
288 }
289 putc(ch);
290 readCh();
291 c = count;
292 break;
293 }
294 }
295 }
296
297 /**
298 * Scan a decimal number
299 */
300 private void scanDecNumber() throws IOException {
301 boolean overflow = false;
302 long value = ch - '0';
303 count = 0;
304 token = Token.INTVAL;
305 intSize = 2; // default
306 putc(ch); // save character in buffer
307 numberLoop:
308 for (;;) {
309 readCh();
310 switch (ch) {
311 case '8':
312 case '9':
313 case '0':
314 case '1':
315 case '2':
316 case '3':
317 case '4':
318 case '5':
319 case '6':
320 case '7':
321 putc(ch);
322 overflow = overflow || (value * 10) / 10 != value;
323 value = (value * 10) + (ch - '0');
324 overflow = overflow || (value - 1 < -1);
325 break;
326 case 'b':
327 readCh();
328 intSize = 1;
329 break numberLoop;
330 case 's':
331 readCh();
332 intSize = 2;
333 break numberLoop;
334 case 'i':
335 readCh();
336 intSize = 4;
337 break numberLoop;
338 case 'l':
339 readCh();
340 intSize = 8;
341 break numberLoop;
342 default:
343 break numberLoop;
344 }
345 }
346 longValue = value;
347 intValue = (int) value;
348 // we have just finished reading the number. The next thing better
349 // not be a letter or digit.
350 if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
351 env.error(in.pos, "invalid.number", Character.toString((char)ch));
352 do {
353 readCh();
354 } while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
355 return;
356 }
357 if (overflow) {
358 env.error(pos, "overflow");
359 }
360 } // scanNumber()
361
362 /**
363 * Scan a hex number.
364 */
365 private void scanHexNumber() throws IOException {
366 boolean overflow = false;
367 long value = 0;
368 int cypher;
369 count = 0;
370 token = Token.INTVAL;
371 intSize = 2; // default
372 putc(ch); // save character in buffer
373 numberLoop:
374 for (int k = 0;; k++) {
375 readCh();
376 switch (ch) {
377 case '8':
378 case '9':
379 case '0':
380 case '1':
381 case '2':
382 case '3':
383 case '4':
384 case '5':
385 case '6':
386 case '7':
387 cypher = (char) ch - '0';
388 break;
389 case 'd':
390 case 'D':
391 case 'e':
392 case 'E':
393 case 'f':
394 case 'F':
395 case 'a':
396 case 'A':
397 case 'b':
398 case 'B':
399 case 'c':
400 case 'C':
401 cypher = 10 + Character.toLowerCase((char) ch) - 'a';
402 break;
403
404 default:
405 break numberLoop;
406 }
407 putc(ch);
408 overflow = overflow || ((value >>> 60) != 0);
409 value = (value << 4) + cypher;
410 intSize = (k + 1) / 2;
411 }
412 longValue = value;
413 intValue = (int) value;
414 // we have just finished reading the number. The next thing better
415 // not be a letter or digit.
416 if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
417 env.error(in.pos, "invalid.number", Character.toString((char)ch));
418 do {
419 readCh();
420 } while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
421 intValue = 0;
422 // } else if ( overflow || (intValue - 1 < -1) ) {
423 } else if (overflow) {
424 intValue = 0; // so we don't get second overflow in Parser
425 env.error(pos, "overflow");
426 }
427 } // scanNumber()
428
429 /**
430 * Scan an escape character.
431 *
432 * @return the character or -1 if it escaped an end-of-line.
433 */
434 private int scanEscapeChar() throws IOException {
435 int p = in.pos;
436
437 readCh();
438 switch (ch) {
439 case '0':
440 case '1':
441 case '2':
442 case '3':
443 case '4':
444 case '5':
445 case '6':
446 case '7': {
447 int n = ch - '0';
448 for (int i = 2; i > 0; i--) {
449 readCh();
450 switch (ch) {
451 case '0':
452 case '1':
453 case '2':
454 case '3':
455 case '4':
456 case '5':
457 case '6':
458 case '7':
459 n = (n << 3) + ch - '0';
460 break;
461
462 default:
463 if (n > 0xFF) {
464 env.error(p, "invalid.escape.char");
465 }
466 return n;
467 }
468 }
469 readCh();
470 if (n > 0xFF) {
471 env.error(p, "invalid.escape.char");
472 }
473 return n;
474 }
475 case 'r':
476 readCh();
477 return '\r';
478 case 'n':
479 readCh();
480 return '\n';
481 case 'f':
482 readCh();
483 return '\f';
484 case 'b':
485 readCh();
486 return '\b';
487 case 't':
488 readCh();
489 return '\t';
490 case '\\':
491 readCh();
492 return '\\';
493 case '\"':
494 readCh();
495 return '\"';
496 case '\'':
497 readCh();
498 return '\'';
499 }
500
501 env.error(p, "invalid.escape.char");
502 readCh();
503 return -1;
504 }
505
506 /**
507 * Scan a string. The current character should be the opening " of the string.
508 */
509 private void scanString() throws IOException {
510 token = Token.STRINGVAL;
511 count = 0;
512 readCh();
513
514 loop:
515 for (;;) {
516 switch (ch) {
517 case EOF:
518 env.error(pos, "eof.in.string");
519 break loop;
520
521 case '\n':
522 readCh();
523 env.error(pos, "newline.in.string");
524 break loop;
525
526 case '"':
527 readCh();
528 break loop;
529
530 case '\\': {
531 int c = scanEscapeChar();
532 if (c >= 0) {
533 putc((char)c);
534 }
535 break;
536 }
537
538 default:
539 putc(ch);
540 readCh();
541 break;
542 }
543 }
544 stringValue = bufferString();
545 }
546
547 /**
548 * Scan a character array. The current character should be the opening ' of the array.
549 */
550 private void scanCharArray() throws IOException {
551 token = Token.LONGSTRINGVAL;
552 ByteBuffer buf = new ByteBuffer();
553 count = 0;
554 readCh();
555
556 loop:
557 for (;;) {
558 int c = ch;
559 switch (ch) {
560 case EOF:
561 env.error(pos, "eof.in.string");
562 break loop;
563
564 case '\n':
565 readCh();
566 env.error(pos, "newline.in.string");
567 break loop;
568
569 case '\'':
570 readCh();
571 break loop;
572
573 case '\\':
574 c = scanEscapeChar();
575 if (c < 0) {
576 break;
577 }
578 // no break - continue
579 default:
580 // see description of java.io.DataOutput.writeUTF()
581 if ((c > 0) && (c <= 0x7F)) {
582 buf.write(c);
583 } else if ((c == 0) || ((c >= 0x80) && (c <= 0x7FF))) {
584 buf.write(0xC0 | (0x1F & (c >> 6)));
585 buf.write(0x80 | (0x3f & c));
586 } else {
587 buf.write(0xc0 | (0x0f & (c >> 12)));
588 buf.write(0x80 | (0x3f & (c >> 6)));
589 buf.write(0x80 | (0x3f & c));
590 }
591 readCh();
592 }
593 }
594 longStringValue = buf;
595 }
596
597 /**
598 * Scan an Identifier. The current character should be the first character of the
599 * identifier.
600 */
601 private void scanIdentifier() throws IOException {
602 count = 0;
603 boolean compound = false;
604 for (;;) {
605 putc(ch);
606 readCh();
607 if ((ch == '/') || (ch == '.') || (ch == '-')) {
608 compound = true;
609 } else if (!Character.isJavaIdentifierPart((char) ch)) {
610 break;
611 }
612 }
613 stringValue = bufferString();
614 if (compound) {
615 token = Token.IDENT;
616 } else {
617 token = keyword_token_ident(stringValue);
618 if (token == Token.IDENT) {
619 intValue = constValue(stringValue);
620 if (intValue != -1) {
621 // this is a constant
622 if (debugCP) {
623 ConstType ct = constType(stringValue);
624 if (ct != null) {
625 addConstDebug(ct);
626 }
627 }
628 token = Token.INTVAL;
629 intSize = 1;
630 longValue = intValue;
631 }
632 }
633 }
634 } // end scanIdentifier
635
636 // skip till symbol
637 protected void skipTill(int sym) throws IOException {
638 while (true) {
639 if( ch == EOF ) {
640 env.error(pos, "eof.in.comment");
641 return;
642 } else if (ch == sym) {
643 return;
644 }
645 readCh();
646 }
647 }
648
649 protected int xscan() throws IOException {
650 int retPos = pos;
651 prevPos = in.pos;
652 docComment = null;
653 sign = 1;
654 for (;;) {
655 pos = in.pos;
656
657 switch (ch) {
658 case EOF:
659 token = Token.EOF;
660 return retPos;
661
662 case '\n':
663 case ' ':
664 case '\t':
665 case '\f':
666 readCh();
667 break;
668
669 case '/':
670 readCh();
671 switch (ch) {
672 case '/':
673 // Parse a // comment
674 do {
675 readCh();
676 } while ((ch != EOF) && (ch != '\n'));
677 break;
678
679 case '*':
680 readCh();
681 if (ch == '*') {
682 docComment = scanDocComment();
683 } else {
684 skipComment();
685 }
686 break;
687
688 default:
689 token = Token.DIV;
690 return retPos;
691 }
692 break;
693
694 case '"':
695 scanString();
696 return retPos;
697
698 case '\'':
699 scanCharArray();
700 return retPos;
701
702 case '-':
703 sign = -sign; // hack: no check that numbers only are allowed after
704 case '+':
705 readCh();
706 break;
707
708 case '0':
709 readCh();
710 token = Token.INTVAL;
711 longValue = intValue = 0;
712 switch (ch) {
713 case 'x':
714 case 'X':
715 scanHexNumber();
716 break;
717 case '0':
718 case '1':
719 case '2':
720 case '3':
721 case '4':
722 case '5':
723 case '6':
724 case '7':
725 case '8':
726 case '9':
727 scanDecNumber();
728 break;
729 case 'b':
730 readCh();
731 intSize = 1;
732 break;
733 case 's':
734 readCh();
735 intSize = 2;
736 break;
737 case 'i':
738 readCh();
739 intSize = 4;
740 break;
741 case 'l':
742 readCh();
743 intSize = 8;
744 break;
745 default:
746 intSize = 2;
747 }
748 return retPos;
749
750 case '1':
751 case '2':
752 case '3':
753 case '4':
754 case '5':
755 case '6':
756 case '7':
757 case '8':
758 case '9':
759 scanDecNumber();
760 return retPos;
761
762 case '{':
763 readCh();
764 token = Token.LBRACE;
765 return retPos;
766
767 case '}':
768 readCh();
769 token = Token.RBRACE;
770 return retPos;
771
772 case '(':
773 readCh();
774 token = Token.LPAREN;
775 return retPos;
776
777 case ')':
778 readCh();
779 token = Token.RPAREN;
780 return retPos;
781
782 case '[':
783 readCh();
784 token = Token.LSQBRACKET;
785 return retPos;
786
787 case ']':
788 readCh();
789 token = Token.RSQBRACKET;
790 return retPos;
791
792 case ',':
793 readCh();
794 token = Token.COMMA;
795 return retPos;
796
797 case ';':
798 readCh();
799 token = Token.SEMICOLON;
800 return retPos;
801
802 case ':':
803 readCh();
804 token = Token.COLON;
805 return retPos;
806
807 case '=':
808 readCh();
809 if (ch == '=') {
810 readCh();
811 token = Token.EQ;
812 return retPos;
813 }
814 token = Token.ASSIGN;
815 return retPos;
816
817 case '\u001a':
818 // Our one concession to DOS.
819 readCh();
820 if (ch == EOF) {
821 token = Token.EOF;
822 return retPos;
823 }
824 env.error(pos, "funny.char");
825 readCh();
826 break;
827
828 case '#':
829 readCh();
830 scanDecNumber();
831 return retPos;
832
833 case '&': {
834 readCh();
835 retPos = pos;
836 if (!Character.isJavaIdentifierStart((char) ch)) {
837 env.error(pos, "identifier.expected");
838 }
839 scanIdentifier();
840 String macroId = stringValue;
841 String macro = (String) macros.get(macroId);
842 if (macro == null) {
843 env.error(pos, "macro.undecl", macroId);
844 throw new SyntaxError();
845 }
846 setMacro(macro);
847 readCh();
848 }
849 break;
850
851 default:
852 if (Character.isJavaIdentifierStart((char) ch)) {
853 scanIdentifier();
854 return retPos;
855 }
856 env.error(pos, "funny.char");
857 readCh();
858 break;
859 }
860 }
861 }
862
863 /**
864 * Scan to a matching '}', ']' or ')'. The current token must be a '{', '[' or '(';
865 */
866 protected void match(Token open, Token close) throws IOException {
867 int depth = 1;
868
869 while (true) {
870 scan();
871 if (token == open) {
872 depth++;
873 } else if (token == close) {
874 if (--depth == 0) {
875 return;
876 }
877 } else if (token == Token.EOF) {
878 env.error(pos, "unbalanced.paren");
879 return;
880 }
881 }
882 }
883
884 /**
885 * Scan the next token.
886 *
887 * @return the position of the previous token.
888 */
889 protected int scan() throws IOException {
890 int retPos = xscan();
891 //env.traceln("scanned:"+token+" ("+keywordName(token)+")");
892 return retPos;
893 }
894
895 /**
896 * Scan the next token.
897 *
898 * @return the position of the previous token.
899 */
900 protected int scanMacro() throws IOException {
901 int retPos = xscan();
902 //env.traceln("scanned:"+token+" ("+keywordName(token)+")");
903 return retPos;
904 }
905 }