1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package shade.parsers;
 26 
 27 import optkl.textmodel.terminal.ANSI;
 28 import shade.shaders.WavesShader;
 29 
 30 import java.util.ArrayList;
 31 import java.util.List;
 32 import java.util.regex.Matcher;
 33 import java.util.regex.Pattern;
 34 
 35 public class ShaderTokenizer {
 36     interface Token {
 37         TOKEN_TYPE tokenType();
 38 
 39         String value();
 40     }
 41 
 42     interface ParentToken extends Token {
 43         List<Token> children();
 44     }
 45 
 46     interface LeafToken extends Token {
 47     }
 48 
 49     interface TypeToken extends LeafToken {
 50     }
 51 
 52     interface ConstToken extends Token {
 53     }
 54 
 55     interface SymbolToken extends Token {
 56     }
 57 
 58     interface SeparatorToken extends SymbolToken {
 59     }
 60 
 61     record CreateToken(TOKEN_TYPE tokenType, String value) implements ConstToken {
 62         public String toString() {
 63             return tokenType + ":" + value;
 64         }
 65     }
 66 
 67     record MathLibCallToken(TOKEN_TYPE tokenType, String value) implements Token {
 68         public String toString() {
 69             return tokenType + ":" + value;
 70         }
 71     }
 72 
 73     record CallToken(TOKEN_TYPE tokenType, String value) implements Token {
 74         public String toString() {
 75             return tokenType + ":" + value;
 76         }
 77     }
 78 
 79     record VecTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
 80         public String toString() {
 81             return tokenType + ":" + value;
 82         }
 83     }
 84 
 85     record MatTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
 86         public String toString() {
 87             return tokenType + ":" + value;
 88         }
 89     }
 90 
 91     record PrimitiveTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
 92         public String toString() {
 93             return tokenType + ":" + value;
 94         }
 95     }
 96 
 97     record FloatConstToken(TOKEN_TYPE tokenType, String value, float f32) implements ConstToken {
 98         public String toString() {
 99             return "F32:" + f32;
100         }
101     }
102 
103     record WSAndLineCommentToken(TOKEN_TYPE tokenType, String value) implements Token {
104         public String toString() {
105             return "WS:" + value.replace("\n", "\\n").replace("\t", "\\t").replace(" ", ".");
106         }
107     }
108 
109     record IntConstToken(TOKEN_TYPE tokenType, String value, int i32) implements ConstToken {
110         public String toString() {
111             return "S32:" + i32;
112         }
113     }
114 
115     record ReservedToken(TOKEN_TYPE tokenType, String value) implements Token {
116         public String toString() {
117             return tokenType + ":" + value;
118         }
119     }
120 
121     record UniformToken(TOKEN_TYPE tokenType, String value) implements Token {
122         public String toString() {
123             return tokenType + ":" + value;
124         }
125     }
126 
127     record IdentifierToken(TOKEN_TYPE tokenType, String value) implements Token {
128         public String toString() {
129             return tokenType + ":" + value;
130         }
131     }
132 
133     record CommaToken(TOKEN_TYPE tokenType, String value) implements SeparatorToken {
134         public String toString() {
135             return tokenType + ":" + value;
136         }
137     }
138 
139     record SemicolonToken(TOKEN_TYPE tokenType, String value) implements SeparatorToken {
140         public String toString() {
141             return tokenType + ":" + value;
142         }
143     }
144 
145     record OToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
146         public String toString() {
147             return tokenType + ":" + value;
148         }
149     }
150 
151     record CToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
152         public String toString() {
153             return tokenType + ":" + value;
154         }
155     }
156 
157     record DotToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
158         public String toString() {
159             return tokenType + ":" + value;
160         }
161     }
162 
163     record PreprocessorToken(TOKEN_TYPE tokenType, String value) implements Token {
164         public String toString() {
165             return tokenType + ":" + value;
166         }
167     }
168 
169     record AssignToken(TOKEN_TYPE tokenType, String value) implements Token {
170         public String toString() {
171             return tokenType + ":" + value;
172         }
173     }
174 
175     record ArithmeticOperator(TOKEN_TYPE tokenType, String value) implements Token {
176         // https://www.codingeek.com/tutorials/c-programming/precedence-and-associativity-of-operators-in-c/
177         enum Precedence {
178             Multiplicative, Additive
179         }
180 
181         public Precedence precedence() {
182             return switch (value) {
183                 case "*", "/" -> Precedence.Multiplicative; // multiplacative
184                 case "-", "+" -> Precedence.Additive; // additive
185                 default -> throw new IllegalStateException("Unexpected value: " + value);
186             };
187         }
188 
189         public String toString() {
190             return tokenType + ":" + value + " " + precedence();
191         }
192     }
193 
194 
195     enum TOKEN_TYPE {
196         NONE(null),// NONE looks useless, but the ordinals for these enums define the groups # from regex.  So dont remove ;)
197         WS_AND_LINE_COMMENT("([ \n\t]+|//[^\n]*\n)"),
198         RESERVED("(in|out|mainImage|return)(?![a-zA-Z0-9_])"),
199         UNIFORM("(iTime|iResolution|iMouse)"),
200         CREATE("(ivec[234]|vec[234]|imat[234]|mat[234])(?=[({])"),
201         MATH_LIB_CALL("(abs|clamp|cos|dot|exp|length|normalize|normal|max|min|mix|pow|reflect|sin|sqrt|tan)(?=[({])"),
202         CALL("([a-zA-Z_][a-zA-Z0-9_]*)(?=[({])"),
203         PRIMITIVE_TYPE("(float|int|void)(?![a-zA-Z0-9_])"),
204         VEC_TYPE("(ivec[234]|vec[234])(?![a-zA-Z0-9_])"),
205         MAT_TYPE("(imat[234]|mat[234])(?![a-zA-Z0-9_])"),
206         PRE_PREPROCESSOR("(#define|#include)"),
207         IDENTIFIER("([a-zA-Z_][a-zA-Z0-9_]*)"),
208         CONST("(\\d+\\.?\\d*)"),
209         OSYMBOL("([{(])"),
210         CSYMBOL("([})])"),
211         ASSIGN("(=|\\+=|\\-=|\\*=|/=)"),
212         ARITHMETIC_OPERATOR("([+/\\-*])"),
213         SEMICOLON("(;)"),
214         COMMA("(,)"),
215         DOT("(.)");
216         String regex;
217 
218         TOKEN_TYPE(String regex) {
219             this.regex = regex;
220         }
221     }
222 
223     private static List<Token> tokenize(String source) {
224         List<Token> tokens = new ArrayList<>();
225         // We walk the enum in order and create a single regex for all token types
226         // order is important ;)
227         StringBuilder regexBuilder = new StringBuilder();
228         for (var token : TOKEN_TYPE.values()) {
229             if (!token.equals(TOKEN_TYPE.NONE)) {
230                 if (!regexBuilder.isEmpty()) {
231                     regexBuilder.append("|");
232                 }
233                 regexBuilder.append(token.regex);
234             }
235         }
236 
237         // Now try to compile the resulting regex
238         Pattern pattern = Pattern.compile(regexBuilder.toString());
239         Matcher matcher = pattern.matcher(source);
240         while (matcher.find()) {
241             for (var tokenType : TOKEN_TYPE.values()) {
242                 if (!tokenType.equals(TOKEN_TYPE.NONE)) {
243                     String val = matcher.group(tokenType.ordinal());
244                     if (val != null && !val.isEmpty()) {
245                         tokens.add(switch (tokenType) {
246                             case WS_AND_LINE_COMMENT -> new WSAndLineCommentToken(tokenType, val);
247                             case MAT_TYPE -> new MatTypeToken(tokenType, val);
248                             case VEC_TYPE -> new VecTypeToken(tokenType, val);
249                             case PRIMITIVE_TYPE -> new PrimitiveTypeToken(tokenType, val);
250                             case PRE_PREPROCESSOR -> new PreprocessorToken(tokenType, val);
251                             case MATH_LIB_CALL -> new MathLibCallToken(tokenType, val);
252                             case CALL -> new CallToken(tokenType, val);
253                             case CREATE -> new CreateToken(tokenType, val);
254                             case UNIFORM -> new UniformToken(tokenType, val);
255                             case RESERVED -> new ReservedToken(tokenType, val);
256                             case IDENTIFIER -> new IdentifierToken(tokenType, val);
257                             case CONST -> val.contains(".")
258                                     ? new FloatConstToken(tokenType, val, Float.parseFloat(val))
259                                     : new IntConstToken(tokenType, val, Integer.parseInt(val));
260                             case OSYMBOL -> new OToken(tokenType, val);
261                             case CSYMBOL -> new CToken(tokenType, val);
262                             case ARITHMETIC_OPERATOR -> new ArithmeticOperator(tokenType, val);
263                             case SEMICOLON -> new SemicolonToken(tokenType, val);
264                             case COMMA -> new CommaToken(tokenType, val);
265                             case DOT -> new DotToken(tokenType, val);
266                             case ASSIGN -> new AssignToken(tokenType, val);
267                             default -> throw new IllegalStateException("We should never get here");
268                         });
269                     }
270                 }
271             }
272         }
273         return tokens;
274     }
275 
276     static class Cursor {
277         List<Token> tokens;
278         int idx;
279 
280         Cursor(List<Token> tokens, int idx) {
281             this.tokens = tokens;
282             this.idx = idx;
283         }
284 
285         Token get() {
286             return tokens.get(idx);
287         }
288 
289         Token next() {
290             idx++;
291             while (get() instanceof WSAndLineCommentToken) {
292                 idx++;
293             }
294             return get();
295         }
296 
297         static Cursor of(List<Token> tokens, int idx) {
298             return idx < tokens.size() ? new Cursor(tokens, idx) : null;
299         }
300     }
301 
302     // Example parse: print declarations and functions
303     private static void parse(List<Token> tokens) {
304         for (int i = 0; i < tokens.size(); i++) {
305             var c = Cursor.of(tokens, i);
306             if (c.get() instanceof TypeToken typeToken && c.next() instanceof CallToken callToken && c.next() instanceof OToken oToken) {
307                 System.out.println("Function found: returnType=" + typeToken + ", name=" + callToken);
308             }
309             if (c.get() instanceof ArithmeticOperator arithmeticOperator) {
310                 System.out.println("Arithmetic " + arithmeticOperator);
311             }
312 
313         }
314     }
315 
316 
317     // Example usage
318     public static void main(String[] args) {
319 
320         List<Token> tokens = tokenize("");
321         var ansi = ANSI.of(System.out);
322 
323         tokens.forEach(token -> {
324             switch (token) {
325                 case WSAndLineCommentToken _ -> ansi.color(ANSI.GREEN, a -> a.apply(token.value()));
326                 case ConstToken _ -> ansi.color(ANSI.YELLOW, a -> a.apply(token.value()));
327                 case TypeToken _ -> ansi.color(ANSI.BLUE, a -> a.apply(token.value()));
328                 case ReservedToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
329                 case PreprocessorToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
330                 case ArithmeticOperator _ -> ansi.color(ANSI.RED, a -> a.apply(token.value()));
331                 case AssignToken _ -> ansi.color(ANSI.RED, a -> a.apply(token.value()));
332                 case UniformToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
333                 case CallToken _ -> ansi.color(ANSI.PURPLE, a -> a.apply(token.value()));
334                 case MathLibCallToken _ -> ansi.color(ANSI.BLUE, a -> a.apply(token.value()));
335                 case SeparatorToken _ -> ansi.color(ANSI.GREEN, a -> a.apply(token.value()));
336                 default -> ansi.apply(token.value());
337             }
338         });
339         // parse(tokens);
340     }
341 
342 }