1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 package jdk.incubator.code.parser;
 27 
 28 import java.io.IOException;
 29 import java.io.InputStream;
 30 import jdk.incubator.code.*;
 31 import jdk.incubator.code.type.FunctionType;
 32 import jdk.incubator.code.op.*;
 33 import jdk.incubator.code.parser.impl.DescParser;
 34 import jdk.incubator.code.parser.impl.Lexer;
 35 import jdk.incubator.code.parser.impl.Scanner;
 36 import jdk.incubator.code.parser.impl.Tokens;
 37 import jdk.incubator.code.type.CoreTypeFactory;
 38 import jdk.incubator.code.type.TypeElementFactory;
 39 import java.nio.charset.StandardCharsets;
 40 import java.util.ArrayList;
 41 import java.util.HashMap;
 42 import java.util.List;
 43 import java.util.Map;
 44 
 45 /**
 46  * A parser of serialized code models from their textual form.
 47  * <p>
 48  * The syntactic grammar of a code mode is specified in the grammar notation, and is a subset of the grammar,
 49  * specified by the JLS, see section 2.4. (Except that we cannot express non-terminal symbols in italic type.)
 50  * <p>
 51  * {@snippet lang=text :
 52  * Operation:
 53  *   [Value =] Name {Operands} {Successors} {Attributes} {Bodies} ;
 54  *
 55  * Operands:
 56  *   ValueIdentifier {ValueIdentifier}
 57  *
 58  * Successors:
 59  *   Successor {Successor}
 60  *
 61  * Successor:
 62  *   BlockIdentifier
 63  *   BlockIdentifier ()
 64  *   BlockIdentifier ( ValueIdentifier {, ValueIdentifier} )
 65  *
 66  * Attributes:
 67  *   Attribute {Attribute}
 68  *
 69  * Attribute:
 70  *   @ AttributeValue
 71  *   @ Name = AttributeValue
 72  *
 73  * AttributeValue:
 74  *   Name
 75  *   StringLiteral
 76  *   NullLiteral
 77  *
 78  * Bodies:
 79  *   Body {Body}
 80  *
 81  * Body:
 82  *   BlockIdentifier ( ) Type -> { Operations {Block} }
 83  *   BlockIdentifier ( Value {, Value} ) Type -> { Operations {Block} }
 84  *
 85  * Operations:
 86  *   Operation {Operation}
 87  *
 88  * Block:
 89  *   BlockIdentifier : Operations
 90  *   BlockIdentifier ( ) : Operations
 91  *   BlockIdentifier ( Value {, Value} ) : Operations
 92  *
 93  * BlockIdentifier:
 94  *   ^ Identifier
 95  *
 96  * Value:
 97  *   ValueIdentifier : Type
 98  *
 99  * ValueIdentifier:
100  *   % JavaLetterOrDigit {JavaLetterOrDigit}
101  *
102  * Name:
103  *   Identifier
104  *   Name . Identifier
105  *
106  * Type:
107  *   same as in section 4.1 of JLS but without any annotations
108  *
109  * Identifier:
110  *   same as in section 3 of JLS
111  *
112  * JavaLetterOrDigit:
113  *   same as in section 3 of JLS
114  *
115  * StringLiteral:
116  *   same as in section 3 of JLS
117  *
118  * NullLiteral:
119  *   same as in section 3 of JLS
120  * }
121  */
122 public final class OpParser {
123 
124     static final TypeElement.ExternalizedTypeElement VOID =
125             new TypeElement.ExternalizedTypeElement("void", List.of());
126 
127     /**
128      * Parse a code model from its serialized textual form obtained from an input stream.
129      *
130      * @param opFactory the operation factory used to construct operations from their general definition
131      * @param in the input stream
132      * @return the list of operations
133      * @throws IOException if parsing fails
134      */
135     public static List<Op> fromStream(OpFactory opFactory, InputStream in) throws IOException {
136         return fromStream(opFactory, CoreTypeFactory.CORE_TYPE_FACTORY, in);
137     }
138 
139     public static List<Op> fromStream(OpFactory opFactory, TypeElementFactory typeFactory, InputStream in) throws IOException {
140         String s = new String(in.readAllBytes(), StandardCharsets.UTF_8);
141         return fromString(opFactory, typeFactory, s);
142     }
143 
144     /**
145      * Parse a code model from its serialized textual form obtained from an input string.
146      *
147      * @param opFactory the operation factory used to construct operations from their general definition
148      * @param in the input string
149      * @return the list of operations
150      */
151     public static List<Op> fromString(OpFactory opFactory, String in) {
152         return parse(opFactory, CoreTypeFactory.CORE_TYPE_FACTORY, in);
153     }
154 
155     public static List<Op> fromString(OpFactory opFactory, TypeElementFactory typeFactory, String in) {
156         return parse(opFactory, typeFactory, in);
157     }
158 
159     /**
160      * Parse a code model, modeling a method, from its serialized textual form obtained from an input string.
161      *
162      * @param in the input string
163      * @return the func operation
164      */
165     public static Op fromStringOfFuncOp(String in) {
166         Op op = fromString(ExtendedOp.FACTORY, in).get(0);
167         if (!(op instanceof CoreOp.FuncOp)) {
168             throw new IllegalArgumentException("Op is not a FuncOp: " + op);
169         }
170         return op;
171     }
172 
173     static List<Op> parse(OpFactory opFactory, TypeElementFactory typeFactory, String in) {
174         Lexer lexer = Scanner.factory().newScanner(in);
175         lexer.nextToken();
176 
177         List<OpNode> opNodes = new OpParser(lexer).parseNodes();
178 
179         Context c = new Context(opFactory, typeFactory);
180         return opNodes.stream().map(n -> nodeToOp(n, VOID, c, null)).toList();
181     }
182 
183 
184     static final class Context {
185         final Context parent;
186         final OpFactory opFactory;
187         final TypeElementFactory typeFactory;
188         final Map<String, Value> valueMap;
189         final Map<String, Block.Builder> blockMap;
190 
191         Context(Context that, boolean isolated) {
192             this.parent = that;
193             this.opFactory = that.opFactory;
194             this.typeFactory = that.typeFactory;
195             this.valueMap = isolated ? new HashMap<>() : new HashMap<>(that.valueMap);
196             this.blockMap = new HashMap<>();
197         }
198 
199         Context(OpFactory opFactory, TypeElementFactory typeFactory) {
200             this.parent = null;
201             this.opFactory = opFactory;
202             this.typeFactory = typeFactory;
203             this.valueMap = new HashMap<>();
204             this.blockMap = new HashMap<>();
205         }
206 
207         Context fork(boolean isolated) {
208             return new Context(this, isolated);
209         }
210 
211         void putValue(String name, Value opr) {
212             valueMap.put(name, opr);
213         }
214 
215         Value getValue(String name) {
216             Value value = valueMap.get(name);
217             if (value == null) {
218                 // @@@ location
219                 throw new IllegalArgumentException("Undeclared value referenced: " + name);
220             }
221 
222             return value;
223         }
224 
225         void putBlock(String name, Block.Builder bm) {
226             blockMap.put(name, bm);
227         }
228 
229         Block.Builder getBlock(String name) {
230             Block.Builder block = blockMap.get(name);
231             if (block == null) {
232                 // @@@ location
233                 throw new IllegalArgumentException("Undeclared block referenced: " + name);
234             }
235 
236             return block;
237         }
238     }
239 
240     static Op nodeToOp(OpNode opNode, TypeElement.ExternalizedTypeElement rtype, Context c, Body.Builder ancestorBody) {
241         ExternalizableOp.ExternalizedOp opdef = nodeToOpDef(opNode, rtype, c, ancestorBody);
242         return c.opFactory.constructOpOrFail(opdef);
243     }
244 
245     static ExternalizableOp.ExternalizedOp nodeToOpDef(OpNode opNode, TypeElement.ExternalizedTypeElement rtype, Context c, Body.Builder ancestorBody) {
246         String operationName = opNode.name;
247         List<Value> operands = opNode.operands.stream().map(c::getValue).toList();
248         List<Block.Reference> successors = opNode.successors.stream()
249                 .map(n -> nodeToSuccessor(n, c)).toList();
250         List<Body.Builder> bodies = opNode.bodies.stream()
251                 .map(n -> nodeToBody(n, c.fork(false), ancestorBody)).toList();
252         return new ExternalizableOp.ExternalizedOp(operationName,
253                 operands,
254                 successors,
255                 c.typeFactory.constructType(rtype),
256                 opNode.attributes,
257                 bodies);
258     }
259 
260     static Body.Builder nodeToBody(BodyNode n, Context c, Body.Builder ancestorBody) {
261         Body.Builder body = Body.Builder.of(ancestorBody,
262                 // Create function type with just the return type and add parameters afterward
263                 FunctionType.functionType(c.typeFactory.constructType(n.rtype)));
264         Block.Builder eb = body.entryBlock();
265 
266         // Create blocks upfront for forward referencing successors
267         for (int i = 0; i < n.blocks.size(); i++) {
268             BlockNode bn = n.blocks.get(i);
269             Block.Builder b;
270             if (i == 0) {
271                 b = body.entryBlock();
272             } else {
273                 b = eb.block();
274                 c.putBlock(bn.name, b);
275             }
276 
277             for (ValueNode a : bn.parameters) {
278                 Block.Parameter v = b.parameter(c.typeFactory.constructType(a.type));
279                 c.putValue(a.name, v);
280             }
281         }
282 
283         // Create operations
284         for (int i = 0; i < n.blocks.size(); i++) {
285             BlockNode bn = n.blocks.get(i);
286             Block.Builder b;
287             if (i == 0) {
288                 b = body.entryBlock();
289             } else {
290                 b = c.getBlock(n.blocks.get(i).name);
291             }
292 
293             for (OpNode on : bn.ops) {
294                 ValueNode r = on.result;
295                 if (r != null) {
296                     Op.Result v = b.op(nodeToOp(on, r.type, c, body));
297                     c.putValue(r.name, v);
298                 } else {
299                     b.op(nodeToOp(on, VOID, c, body));
300                 }
301             }
302         }
303 
304         return body;
305     }
306 
307     static Block.Reference nodeToSuccessor(SuccessorNode n, Context c) {
308         return c.getBlock(n.blockName).successor(n.arguments().stream().map(c::getValue).toList());
309     }
310 
311     // @@@ Add tokens to access position of nodes on error
312 
313     record OpNode(ValueNode result,
314                   String name,
315                   List<String> operands,
316                   List<SuccessorNode> successors,
317                   Map<String, Object> attributes,
318                   List<BodyNode> bodies) {
319     }
320 
321     record SuccessorNode(String blockName,
322                          List<String> arguments) {
323     }
324 
325     record BodyNode(TypeElement.ExternalizedTypeElement rtype,
326                     List<BlockNode> blocks) {
327     }
328 
329     record BlockNode(String name,
330                      List<ValueNode> parameters,
331                      List<OpNode> ops) {
332     }
333 
334     record ValueNode(String name,
335                      TypeElement.ExternalizedTypeElement type) {
336     }
337 
338     final Lexer lexer;
339 
340     OpParser(Lexer lexer) {
341         this.lexer = lexer;
342     }
343 
344     List<OpNode> parseNodes() {
345         List<OpNode> ops = new ArrayList<>();
346         while (lexer.token().kind != Tokens.TokenKind.EOF) {
347             ops.add(parseOperation());
348         }
349         return ops;
350     }
351 
352     OpNode parseOperation() {
353         ValueNode operationResult;
354         if (lexer.is(Tokens.TokenKind.VALUE_IDENTIFIER)) {
355             operationResult = parseValueNode();
356             lexer.accept(Tokens.TokenKind.EQ);
357         } else {
358             operationResult = null;
359         }
360 
361         String operationName = parseName();
362 
363         // Operands
364         final List<String> operands;
365         if (lexer.is(Tokens.TokenKind.VALUE_IDENTIFIER)) {
366             operands = parseOperands();
367         } else {
368             operands = List.of();
369         }
370 
371         // Successors
372         // ^name(%x, %d)
373         final List<SuccessorNode> successors;
374         if (lexer.is(Tokens.TokenKind.CARET)) {
375             successors = parseSuccessors();
376         } else {
377             successors = List.of();
378         }
379 
380         // Attributes
381         final Map<String, Object> attributes;
382         if (lexer.is(Tokens.TokenKind.MONKEYS_AT)) {
383             attributes = parseAttributes();
384         } else {
385             attributes = Map.of();
386         }
387 
388         // Bodies
389         List<BodyNode> bodies;
390         if (lexer.is(Tokens.TokenKind.CARET) || lexer.is(Tokens.TokenKind.LPAREN)) {
391             bodies = parseBodies();
392         } else {
393             bodies = List.of();
394         }
395 
396         lexer.accept(Tokens.TokenKind.SEMI);
397 
398         return new OpNode(operationResult, operationName, operands, successors, attributes, bodies);
399     }
400 
401     Map<String, Object> parseAttributes() {
402         Map<String, Object> attributes = new HashMap<>();
403         while (lexer.acceptIf(Tokens.TokenKind.MONKEYS_AT)) {
404             String attributeName;
405             if (lexer.is(Tokens.TokenKind.IDENTIFIER)) {
406                 attributeName = parseName();
407                 lexer.accept(Tokens.TokenKind.EQ);
408             } else {
409                 attributeName = "";
410             }
411             Object attributeValue = parseAttributeValue();
412             attributes.put(attributeName, attributeValue);
413         }
414         return attributes;
415     }
416 
417     Object parseAttributeValue() {
418         if (lexer.is(Tokens.TokenKind.IDENTIFIER)) {
419             return parseName();
420         }
421 
422         Object value = parseLiteral(lexer.token());
423         lexer.nextToken();
424 
425         return value;
426     }
427 
428     Object parseLiteral(Tokens.Token t) {
429         return switch (t.kind) {
430             case STRINGLITERAL -> t.stringVal();
431             case NULL -> ExternalizableOp.NULL_ATTRIBUTE_VALUE;
432             default -> throw lexer.unexpected();
433         };
434     }
435 
436     List<String> parseOperands() {
437         List<String> operands = new ArrayList<>();
438         while (lexer.is(Tokens.TokenKind.VALUE_IDENTIFIER)) {
439             operands.add(lexer.token().name().substring(1));
440             lexer.nextToken();
441         }
442         return operands;
443     }
444 
445     List<SuccessorNode> parseSuccessors() {
446         List<SuccessorNode> successors = new ArrayList<>();
447 
448         while (lexer.is(Tokens.TokenKind.CARET) && !isBody()) {
449             lexer.nextToken();
450             successors.add(parseSuccessor());
451         }
452 
453         return successors;
454     }
455 
456     // Lookahead from "^" to determine if Body
457     boolean isBody() {
458         assert lexer.is(Tokens.TokenKind.CARET);
459 
460         int pos = 1;
461         lexer.token(pos++);
462         assert lexer.token(1).kind == Tokens.TokenKind.IDENTIFIER;
463 
464         if (lexer.token(pos++).kind != Tokens.TokenKind.LPAREN) {
465             return false;
466         }
467 
468         Tokens.Token t;
469         while ((t = lexer.token(pos++)).kind != Tokens.TokenKind.RPAREN) {
470             if (t.kind == Tokens.TokenKind.EOF) {
471                 return false;
472             } else if (t.kind == Tokens.TokenKind.COLON) {
473                 // Encountered Value
474                 return true;
475             }
476         }
477 
478         // Encountered return type
479         return lexer.token(pos++).kind == Tokens.TokenKind.IDENTIFIER;
480     }
481 
482     SuccessorNode parseSuccessor() {
483         String blockName = lexer.accept(Tokens.TokenKind.IDENTIFIER).name();
484 
485         List<String> arguments = new ArrayList<>();
486         if (lexer.acceptIf(Tokens.TokenKind.LPAREN) && !lexer.acceptIf(Tokens.TokenKind.RPAREN)) {
487             do {
488                 arguments.add(lexer.accept(Tokens.TokenKind.VALUE_IDENTIFIER).name().substring(1));
489             } while (lexer.acceptIf(Tokens.TokenKind.COMMA));
490             lexer.accept(Tokens.TokenKind.RPAREN);
491         }
492 
493         return new SuccessorNode(blockName, arguments);
494     }
495 
496     List<BodyNode> parseBodies() {
497         List<BodyNode> bodies = new ArrayList<>();
498         while (lexer.is(Tokens.TokenKind.CARET) || lexer.is(Tokens.TokenKind.LPAREN)) {
499             BodyNode body = parseBody();
500             bodies.add(body);
501         }
502         return bodies;
503     }
504 
505     BodyNode parseBody() {
506         // Body name
507         final String bodyName;
508         if (lexer.acceptIf(Tokens.TokenKind.CARET)) {
509             bodyName = lexer.accept(Tokens.TokenKind.IDENTIFIER).name();
510         } else {
511             bodyName = null;
512         }
513 
514         // Entry block header
515         List<ValueNode> arguments = parseBlockHeaderArguments(true);
516         // Body return type
517         TypeElement.ExternalizedTypeElement rtype = parseExTypeElem();
518 
519         lexer.accept(Tokens.TokenKind.ARROW);
520         lexer.accept(Tokens.TokenKind.LBRACE);
521 
522         List<BlockNode> blocks = parseBlocks(bodyName, arguments);
523 
524         lexer.accept(Tokens.TokenKind.RBRACE);
525 
526         return new BodyNode(rtype, blocks);
527     }
528 
529     List<ValueNode> parseBlockHeaderArguments(boolean isEntryBlock) {
530         boolean parseArguments;
531         if (isEntryBlock) {
532             lexer.accept(Tokens.TokenKind.LPAREN);
533             parseArguments = true;
534         } else {
535             parseArguments = lexer.acceptIf(Tokens.TokenKind.LPAREN);
536         }
537         if (!parseArguments || lexer.acceptIf(Tokens.TokenKind.RPAREN)) {
538             return new ArrayList<>();
539         }
540 
541         List<ValueNode> arguments = new ArrayList<>();
542         do {
543             arguments.add(parseValueNode());
544         } while (lexer.acceptIf(Tokens.TokenKind.COMMA));
545         lexer.accept(Tokens.TokenKind.RPAREN);
546 
547         return arguments;
548     }
549 
550     ValueNode parseValueNode() {
551         String valueName = lexer.accept(Tokens.TokenKind.VALUE_IDENTIFIER).name().substring(1);
552 
553         lexer.accept(Tokens.TokenKind.COLON);
554 
555         TypeElement.ExternalizedTypeElement type = parseExTypeElem();
556 
557         return new ValueNode(valueName, type);
558     }
559 
560     List<BlockNode> parseBlocks(String entryBlockName, List<ValueNode> entryBlockArguments) {
561         List<BlockNode> blocks = new ArrayList<>();
562 
563         // Entry block ops
564         BlockNode entryBlock = new BlockNode(entryBlockName, entryBlockArguments, parseOperations());
565         blocks.add(entryBlock);
566 
567         // Subsequent blocks
568         while (lexer.acceptIf(Tokens.TokenKind.CARET)) {
569             String blockName = lexer.accept(Tokens.TokenKind.IDENTIFIER).name();
570             List<ValueNode> blockArguments = parseBlockHeaderArguments(false);
571 
572             lexer.accept(Tokens.TokenKind.COLON);
573 
574             BlockNode block = new BlockNode(blockName, blockArguments, parseOperations());
575             blocks.add(block);
576         }
577 
578         return blocks;
579     }
580 
581     List<OpNode> parseOperations() {
582         List<OpNode> ops = new ArrayList<>();
583         while (lexer.is(Tokens.TokenKind.MONKEYS_AT) || lexer.is(Tokens.TokenKind.VALUE_IDENTIFIER) || lexer.is(Tokens.TokenKind.IDENTIFIER)) {
584             OpNode op = parseOperation();
585             ops.add(op);
586         }
587         return ops;
588     }
589 
590     String parseName() {
591         Tokens.Token t = lexer.accept(Tokens.TokenKind.IDENTIFIER);
592         StringBuilder name = new StringBuilder();
593         name.append(t.name());
594         while (lexer.acceptIf(Tokens.TokenKind.DOT)) {
595             name.append(Tokens.TokenKind.DOT.name);
596             t = lexer.accept(Tokens.TokenKind.IDENTIFIER);
597             name.append(t.name());
598         }
599         return name.toString();
600     }
601 
602     TypeElement.ExternalizedTypeElement parseExTypeElem() {
603         return DescParser.parseExTypeElem(lexer);
604     }
605 }
606