1 /*
  2  * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 package org.openjdk.asmtools.jcoder;
 24 
 25 import static org.openjdk.asmtools.jcoder.JcodTokens.*;
 26 
 27 import java.io.IOException;
 28 import java.util.HashMap;
 29 
 30 /**
 31  * A Scanner for Jcoder tokens. Errors are reported to the environment object.<p>
 32  *
 33  * The scanner keeps track of the current token, the value of the current token (if any),
 34  * and the start position of the current token.<p>
 35  *
 36  * The scan() method advances the scanner to the next token in the input.<p>
 37  *
 38  * The match() method is used to quickly match opening brackets (ie: '(', '{', or '[')
 39  * with their closing counter part. This is useful during error recovery.<p>
 40  *
 41  * The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p>
 42  */
 43 public class Scanner {
 44     /*-------------------------------------------------------- */
 45     /* Scanner Fields */
 46 
 47     /**
 48      * End of input
 49      */
 50     public static final int EOF = -1;
 51     public static final int LBRACE = 123; // "{"
 52     private boolean debugCP = false;
 53     private int numCPentrs = 0;
 54 
 55     /**
 56      * Where errors are reported
 57      */
 58     protected SourceFile env;
 59 
 60     /**
 61      * Input stream
 62      */
 63     protected SourceFile in;
 64     HashMap<String, String> macros;
 65 
 66     /**
 67      * The current character
 68      */
 69     protected int ch, prevCh = -1;
 70     protected String macro;
 71     protected int indexMacro;
 72 
 73     /**
 74      * Current token
 75      */
 76     protected Token token;
 77 
 78     /**
 79      * The position of the current token
 80      */
 81     protected int pos;
 82 
 83     /**
 84      * The position of the previous token
 85      */
 86     protected int prevPos;
 87 
 88     /*  Token values. */
 89     protected long longValue;
 90     protected int intValue;
 91     protected int intSize;
 92     protected String stringValue;
 93     protected ByteBuffer longStringValue;
 94     protected int sign; // sign, when reading number
 95 
 96     /*  A doc comment preceding the most recent token */
 97     protected String docComment;
 98 
 99     /**
100      * A growable character buffer.
101      */
102     private int count;
103     private char[] buffer = new char[32];
104 
105     /*-------------------------------------------------------- */
106     /**
107      * Create a scanner to scan an input stream.
108      */
109     protected Scanner(SourceFile sf, HashMap<String, String> macros)
110             throws IOException {
111         this.env = sf;
112         this.in = sf;
113         this.macros = macros;
114 
115         ch = sf.read();
116         prevPos = sf.pos;
117 
118         scan();
119     }
120 
121     /**
122      * for use in jcfront.
123      */
124     protected Scanner(SourceFile sf)
125             throws IOException {
126         this.env = sf;
127         this.in = sf;
128         this.macros = new HashMap<>();
129 
130         ch = sf.read();
131         prevPos = sf.pos;
132 
133         scan();
134     }
135 
136     /* *********************************************** */
137     void setDebugCP(boolean enable) {
138         if (enable) {
139             numCPentrs = 0;
140         }
141         debugCP = enable;
142 
143     }
144 
145     void addConstDebug(ConstType ct) {
146         numCPentrs += 1;
147         env.traceln("\n Const[" + numCPentrs + "] = " + ct.printval());
148     }
149 
150     void setMacro(String macro) {
151         this.macro = macro;
152         indexMacro = 0;
153         prevCh = ch;
154     }
155 
156     void readCh() throws IOException {
157         if (macro != null) {
158             if (indexMacro < macro.length()) {
159                 ch = macro.charAt(indexMacro);
160             }
161             macro = null;
162         }
163         if (prevCh >= 0) {
164             ch = prevCh;
165             prevCh = -1;
166         } else {
167             ch = in.read();
168         }
169     }
170 
171     private void putc(int ch) {
172         if (count == buffer.length) {
173             char[] newBuffer = new char[buffer.length * 2];
174             System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
175             buffer = newBuffer;
176         }
177         buffer[count++] = (char) ch;
178     }
179 
180     private String bufferString() {
181         char[] buf = new char[count];
182         System.arraycopy(buffer, 0, buf, 0, count);
183         return new String(buf);
184     }
185 
186     /**
187      * Scan a comment. This method should be called once the initial /, * and the next
188      * character have been read.
189      */
190     private void skipComment() throws IOException {
191         while (true) {
192             switch (ch) {
193                 case EOF:
194                     env.error(pos, "eof.in.comment");
195                     return;
196 
197                 case '*':
198                     readCh();
199                     if (ch == '/') {
200                         readCh();
201                         return;
202                     }
203                     break;
204 
205                 default:
206                     readCh();
207                     break;
208             }
209         }
210     }
211 
212     /**
213      * Scan a doc comment. This method should be called once the initial /, * and * have
214      * been read. It gathers the content of the comment (witout leading spaces and '*'s)
215      * in the string buffer.
216      */
217     private String scanDocComment() throws IOException {
218         count = 0;
219 
220         if (ch == '*') {
221             do {
222                 readCh();
223             } while (ch == '*');
224             if (ch == '/') {
225                 readCh();
226                 return "";
227             }
228         }
229         switch (ch) {
230             case '\n':
231             case ' ':
232                 readCh();
233                 break;
234         }
235 
236         boolean seenstar = false;
237         int c = count;
238         while (true) {
239             switch (ch) {
240                 case EOF:
241                     env.error(pos, "eof.in.comment");
242                     return bufferString();
243 
244                 case '\n':
245                     putc('\n');
246                     readCh();
247                     seenstar = false;
248                     c = count;
249                     break;
250 
251                 case ' ':
252                 case '\t':
253                     putc(ch);
254                     readCh();
255                     break;
256 
257                 case '*':
258                     if (seenstar) {
259                         readCh();
260                         if (ch == '/') {
261                             readCh();
262                             count = c;
263                             return bufferString();
264                         }
265                         putc('*');
266                     } else {
267                         seenstar = true;
268                         count = c;
269                         do {
270                             readCh();
271                         } while (ch == '*');
272                         switch (ch) {
273                             case ' ':
274                                 readCh();
275                                 break;
276 
277                             case '/':
278                                 readCh();
279                                 count = c;
280                                 return bufferString();
281                         }
282                     }
283                     break;
284 
285                 default:
286                     if (!seenstar) {
287                         seenstar = true;
288                     }
289                     putc(ch);
290                     readCh();
291                     c = count;
292                     break;
293             }
294         }
295     }
296 
297     /**
298      * Scan a decimal number
299      */
300     private void scanDecNumber() throws IOException {
301         boolean overflow = false;
302         long value = ch - '0';
303         count = 0;
304         token = Token.INTVAL;
305         intSize = 2; // default
306         putc(ch);    // save character in buffer
307 numberLoop:
308         for (;;) {
309             readCh();
310             switch (ch) {
311                 case '8':
312                 case '9':
313                 case '0':
314                 case '1':
315                 case '2':
316                 case '3':
317                 case '4':
318                 case '5':
319                 case '6':
320                 case '7':
321                     putc(ch);
322                     overflow = overflow || (value * 10) / 10 != value;
323                     value = (value * 10) + (ch - '0');
324                     overflow = overflow || (value - 1 < -1);
325                     break;
326                 case 'b':
327                     readCh();
328                     intSize = 1;
329                     break numberLoop;
330                 case 's':
331                     readCh();
332                     intSize = 2;
333                     break numberLoop;
334                 case 'i':
335                     readCh();
336                     intSize = 4;
337                     break numberLoop;
338                 case 'l':
339                     readCh();
340                     intSize = 8;
341                     break numberLoop;
342                 default:
343                     break numberLoop;
344             }
345         }
346         longValue = value;
347         intValue = (int) value;
348         // we have just finished reading the number.  The next thing better
349         // not be a letter or digit.
350         if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
351             env.error(in.pos, "invalid.number", Character.toString((char)ch));
352             do {
353                 readCh();
354             } while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
355             return;
356         }
357         if (overflow) {
358             env.error(pos, "overflow");
359         }
360     } // scanNumber()
361 
362     /**
363      * Scan a hex number.
364      */
365     private void scanHexNumber() throws IOException {
366         boolean overflow = false;
367         long value = 0;
368         int cypher;
369         count = 0;
370         token = Token.INTVAL;
371         intSize = 2; // default
372         putc(ch);    // save character in buffer
373 numberLoop:
374         for (int k = 0;; k++) {
375             readCh();
376             switch (ch) {
377                 case '8':
378                 case '9':
379                 case '0':
380                 case '1':
381                 case '2':
382                 case '3':
383                 case '4':
384                 case '5':
385                 case '6':
386                 case '7':
387                     cypher = (char) ch - '0';
388                     break;
389                 case 'd':
390                 case 'D':
391                 case 'e':
392                 case 'E':
393                 case 'f':
394                 case 'F':
395                 case 'a':
396                 case 'A':
397                 case 'b':
398                 case 'B':
399                 case 'c':
400                 case 'C':
401                     cypher = 10 + Character.toLowerCase((char) ch) - 'a';
402                     break;
403 
404                 default:
405                     break numberLoop;
406             }
407             putc(ch);
408             overflow = overflow || ((value >>> 60) != 0);
409             value = (value << 4) + cypher;
410             intSize = (k + 1) / 2;
411         }
412         longValue = value;
413         intValue = (int) value;
414         // we have just finished reading the number.  The next thing better
415         // not be a letter or digit.
416         if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
417             env.error(in.pos, "invalid.number", Character.toString((char)ch));
418             do {
419                 readCh();
420             } while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
421             intValue = 0;
422 //        } else if ( overflow || (intValue - 1 < -1) ) {
423         } else if (overflow) {
424             intValue = 0;   // so we don't get second overflow in Parser
425             env.error(pos, "overflow");
426         }
427     } // scanNumber()
428 
429     /**
430      * Scan an escape character.
431      *
432      * @return the character or -1 if it escaped an end-of-line.
433      */
434     private int scanEscapeChar() throws IOException {
435         int p = in.pos;
436 
437         readCh();
438         switch (ch) {
439             case '0':
440             case '1':
441             case '2':
442             case '3':
443             case '4':
444             case '5':
445             case '6':
446             case '7': {
447                 int n = ch - '0';
448                 for (int i = 2; i > 0; i--) {
449                     readCh();
450                     switch (ch) {
451                         case '0':
452                         case '1':
453                         case '2':
454                         case '3':
455                         case '4':
456                         case '5':
457                         case '6':
458                         case '7':
459                             n = (n << 3) + ch - '0';
460                             break;
461 
462                         default:
463                             if (n > 0xFF) {
464                                 env.error(p, "invalid.escape.char");
465                             }
466                             return n;
467                     }
468                 }
469                 readCh();
470                 if (n > 0xFF) {
471                     env.error(p, "invalid.escape.char");
472                 }
473                 return n;
474             }
475             case 'r':
476                 readCh();
477                 return '\r';
478             case 'n':
479                 readCh();
480                 return '\n';
481             case 'f':
482                 readCh();
483                 return '\f';
484             case 'b':
485                 readCh();
486                 return '\b';
487             case 't':
488                 readCh();
489                 return '\t';
490             case '\\':
491                 readCh();
492                 return '\\';
493             case '\"':
494                 readCh();
495                 return '\"';
496             case '\'':
497                 readCh();
498                 return '\'';
499         }
500 
501         env.error(p, "invalid.escape.char");
502         readCh();
503         return -1;
504     }
505 
506     /**
507      * Scan a string. The current character should be the opening " of the string.
508      */
509     private void scanString() throws IOException {
510         token = Token.STRINGVAL;
511         count = 0;
512         readCh();
513 
514 loop:
515         for (;;) {
516             switch (ch) {
517                 case EOF:
518                     env.error(pos, "eof.in.string");
519                     break loop;
520 
521                 case '\n':
522                     readCh();
523                     env.error(pos, "newline.in.string");
524                     break loop;
525 
526                 case '"':
527                     readCh();
528                     break loop;
529 
530                 case '\\': {
531                     int c = scanEscapeChar();
532                     if (c >= 0) {
533                         putc((char)c);
534                     }
535                     break;
536                 }
537 
538                 default:
539                     putc(ch);
540                     readCh();
541                     break;
542             }
543         }
544         stringValue = bufferString();
545     }
546 
547     /**
548      * Scan a character array. The current character should be the opening ' of the array.
549      */
550     private void scanCharArray() throws IOException {
551         token = Token.LONGSTRINGVAL;
552         ByteBuffer buf = new ByteBuffer();
553         count = 0;
554         readCh();
555 
556 loop:
557         for (;;) {
558             int c = ch;
559             switch (ch) {
560                 case EOF:
561                     env.error(pos, "eof.in.string");
562                     break loop;
563 
564                 case '\n':
565                     readCh();
566                     env.error(pos, "newline.in.string");
567                     break loop;
568 
569                 case '\'':
570                     readCh();
571                     break loop;
572 
573                 case '\\':
574                     c = scanEscapeChar();
575                     if (c < 0) {
576                         break;
577                     }
578                 // no break - continue
579                 default:
580                     // see  description of java.io.DataOutput.writeUTF()
581                     if ((c > 0) && (c <= 0x7F)) {
582                         buf.write(c);
583                     } else if ((c == 0) || ((c >= 0x80) && (c <= 0x7FF))) {
584                         buf.write(0xC0 | (0x1F & (c >> 6)));
585                         buf.write(0x80 | (0x3f & c));
586                     } else {
587                         buf.write(0xc0 | (0x0f & (c >> 12)));
588                         buf.write(0x80 | (0x3f & (c >> 6)));
589                         buf.write(0x80 | (0x3f & c));
590                     }
591                     readCh();
592             }
593         }
594         longStringValue = buf;
595     }
596 
597     /**
598      * Scan an Identifier. The current character should be the first character of the
599      * identifier.
600      */
601     private void scanIdentifier() throws IOException {
602         count = 0;
603         boolean compound = false;
604         for (;;) {
605             putc(ch);
606             readCh();
607             if ((ch == '/') || (ch == '.') || (ch == '-')) {
608                 compound = true;
609             } else if (!Character.isJavaIdentifierPart((char) ch)) {
610                 break;
611             }
612         }
613         stringValue = bufferString();
614         if (compound) {
615             token = Token.IDENT;
616         } else {
617             token = keyword_token_ident(stringValue);
618             if (token == Token.IDENT) {
619                 intValue = constValue(stringValue);
620                 if (intValue != -1) {
621                     // this is a constant
622                     if (debugCP) {
623                         ConstType ct = constType(stringValue);
624                         if (ct != null) {
625                             addConstDebug(ct);
626                         }
627                     }
628                     token = Token.INTVAL;
629                     intSize = 1;
630                     longValue = intValue;
631                 }
632             }
633         }
634     } // end scanIdentifier
635 
636     // skip till symbol
637     protected void skipTill(int sym) throws IOException {
638         while (true) {
639             if( ch == EOF ) {
640                 env.error(pos, "eof.in.comment");
641                 return;
642             } else if (ch == sym) {
643                 return;
644             }
645             readCh();
646         }
647     }
648 
649     protected int xscan() throws IOException {
650         int retPos = pos;
651         prevPos = in.pos;
652         docComment = null;
653         sign = 1;
654         for (;;) {
655             pos = in.pos;
656 
657             switch (ch) {
658                 case EOF:
659                     token = Token.EOF;
660                     return retPos;
661 
662                 case '\n':
663                 case ' ':
664                 case '\t':
665                 case '\f':
666                     readCh();
667                     break;
668 
669                 case '/':
670                     readCh();
671                     switch (ch) {
672                         case '/':
673                             // Parse a // comment
674                             do {
675                                 readCh();
676                             } while ((ch != EOF) && (ch != '\n'));
677                             break;
678 
679                         case '*':
680                             readCh();
681                             if (ch == '*') {
682                                 docComment = scanDocComment();
683                             } else {
684                                 skipComment();
685                             }
686                             break;
687 
688                         default:
689                             token = Token.DIV;
690                             return retPos;
691                     }
692                     break;
693 
694                 case '"':
695                     scanString();
696                     return retPos;
697 
698                 case '\'':
699                     scanCharArray();
700                     return retPos;
701 
702                 case '-':
703                     sign = -sign; // hack: no check that numbers only are allowed after
704                 case '+':
705                     readCh();
706                     break;
707 
708                 case '0':
709                     readCh();
710                     token = Token.INTVAL;
711                     longValue = intValue = 0;
712                     switch (ch) {
713                         case 'x':
714                         case 'X':
715                             scanHexNumber();
716                             break;
717                         case '0':
718                         case '1':
719                         case '2':
720                         case '3':
721                         case '4':
722                         case '5':
723                         case '6':
724                         case '7':
725                         case '8':
726                         case '9':
727                             scanDecNumber();
728                             break;
729                         case 'b':
730                             readCh();
731                             intSize = 1;
732                             break;
733                         case 's':
734                             readCh();
735                             intSize = 2;
736                             break;
737                         case 'i':
738                             readCh();
739                             intSize = 4;
740                             break;
741                         case 'l':
742                             readCh();
743                             intSize = 8;
744                             break;
745                         default:
746                             intSize = 2;
747                     }
748                     return retPos;
749 
750                 case '1':
751                 case '2':
752                 case '3':
753                 case '4':
754                 case '5':
755                 case '6':
756                 case '7':
757                 case '8':
758                 case '9':
759                     scanDecNumber();
760                     return retPos;
761 
762                 case '{':
763                     readCh();
764                     token = Token.LBRACE;
765                     return retPos;
766 
767                 case '}':
768                     readCh();
769                     token = Token.RBRACE;
770                     return retPos;
771 
772                 case '(':
773                     readCh();
774                     token = Token.LPAREN;
775                     return retPos;
776 
777                 case ')':
778                     readCh();
779                     token = Token.RPAREN;
780                     return retPos;
781 
782                 case '[':
783                     readCh();
784                     token = Token.LSQBRACKET;
785                     return retPos;
786 
787                 case ']':
788                     readCh();
789                     token = Token.RSQBRACKET;
790                     return retPos;
791 
792                 case ',':
793                     readCh();
794                     token = Token.COMMA;
795                     return retPos;
796 
797                 case ';':
798                     readCh();
799                     token = Token.SEMICOLON;
800                     return retPos;
801 
802                 case ':':
803                     readCh();
804                     token = Token.COLON;
805                     return retPos;
806 
807                 case '=':
808                     readCh();
809                     if (ch == '=') {
810                         readCh();
811                         token = Token.EQ;
812                         return retPos;
813                     }
814                     token = Token.ASSIGN;
815                     return retPos;
816 
817                 case '\u001a':
818                     // Our one concession to DOS.
819                     readCh();
820                     if (ch == EOF) {
821                         token = Token.EOF;
822                         return retPos;
823                     }
824                     env.error(pos, "funny.char");
825                     readCh();
826                     break;
827 
828                 case '#':
829                     readCh();
830                     scanDecNumber();
831                     return retPos;
832 
833                 case '&': {
834                     readCh();
835                     retPos = pos;
836                     if (!Character.isJavaIdentifierStart((char) ch)) {
837                         env.error(pos, "identifier.expected");
838                     }
839                     scanIdentifier();
840                     String macroId = stringValue;
841                     String macro = (String) macros.get(macroId);
842                     if (macro == null) {
843                         env.error(pos, "macro.undecl", macroId);
844                         throw new SyntaxError();
845                     }
846                     setMacro(macro);
847                     readCh();
848                 }
849                 break;
850 
851                 default:
852                     if (Character.isJavaIdentifierStart((char) ch)) {
853                         scanIdentifier();
854                         return retPos;
855                     }
856                     env.error(pos, "funny.char");
857                     readCh();
858                     break;
859             }
860         }
861     }
862 
863     /**
864      * Scan to a matching '}', ']' or ')'. The current token must be a '{', '[' or '(';
865      */
866     protected void match(Token open, Token close) throws IOException {
867         int depth = 1;
868 
869         while (true) {
870             scan();
871             if (token == open) {
872                 depth++;
873             } else if (token == close) {
874                 if (--depth == 0) {
875                     return;
876                 }
877             } else if (token == Token.EOF) {
878                 env.error(pos, "unbalanced.paren");
879                 return;
880             }
881         }
882     }
883 
884     /**
885      * Scan the next token.
886      *
887      * @return the position of the previous token.
888      */
889     protected int scan() throws IOException {
890         int retPos = xscan();
891 //env.traceln("scanned:"+token+" ("+keywordName(token)+")");
892         return retPos;
893     }
894 
895     /**
896      * Scan the next token.
897      *
898      * @return the position of the previous token.
899      */
900     protected int scanMacro() throws IOException {
901         int retPos = xscan();
902 //env.traceln("scanned:"+token+" ("+keywordName(token)+")");
903         return retPos;
904     }
905 }