1 /*
2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 package shade.parsers;
26
27 import optkl.textmodel.terminal.ANSI;
28 import shade.shaders.WavesShader;
29
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34
35 public class ShaderTokenizer {
36 interface Token {
37 TOKEN_TYPE tokenType();
38
39 String value();
40 }
41
42 interface ParentToken extends Token {
43 List<Token> children();
44 }
45
46 interface LeafToken extends Token {
47 }
48
49 interface TypeToken extends LeafToken {
50 }
51
52 interface ConstToken extends Token {
53 }
54
55 interface SymbolToken extends Token {
56 }
57
58 interface SeparatorToken extends SymbolToken {
59 }
60
61 record CreateToken(TOKEN_TYPE tokenType, String value) implements ConstToken {
62 public String toString() {
63 return tokenType + ":" + value;
64 }
65 }
66
67 record MathLibCallToken(TOKEN_TYPE tokenType, String value) implements Token {
68 public String toString() {
69 return tokenType + ":" + value;
70 }
71 }
72
73 record CallToken(TOKEN_TYPE tokenType, String value) implements Token {
74 public String toString() {
75 return tokenType + ":" + value;
76 }
77 }
78
79 record VecTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
80 public String toString() {
81 return tokenType + ":" + value;
82 }
83 }
84
85 record MatTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
86 public String toString() {
87 return tokenType + ":" + value;
88 }
89 }
90
91 record PrimitiveTypeToken(TOKEN_TYPE tokenType, String value) implements TypeToken {
92 public String toString() {
93 return tokenType + ":" + value;
94 }
95 }
96
97 record FloatConstToken(TOKEN_TYPE tokenType, String value, float f32) implements ConstToken {
98 public String toString() {
99 return "F32:" + f32;
100 }
101 }
102
103 record WSAndLineCommentToken(TOKEN_TYPE tokenType, String value) implements Token {
104 public String toString() {
105 return "WS:" + value.replace("\n", "\\n").replace("\t", "\\t").replace(" ", ".");
106 }
107 }
108
109 record IntConstToken(TOKEN_TYPE tokenType, String value, int i32) implements ConstToken {
110 public String toString() {
111 return "S32:" + i32;
112 }
113 }
114
115 record ReservedToken(TOKEN_TYPE tokenType, String value) implements Token {
116 public String toString() {
117 return tokenType + ":" + value;
118 }
119 }
120
121 record UniformToken(TOKEN_TYPE tokenType, String value) implements Token {
122 public String toString() {
123 return tokenType + ":" + value;
124 }
125 }
126
127 record IdentifierToken(TOKEN_TYPE tokenType, String value) implements Token {
128 public String toString() {
129 return tokenType + ":" + value;
130 }
131 }
132
133 record CommaToken(TOKEN_TYPE tokenType, String value) implements SeparatorToken {
134 public String toString() {
135 return tokenType + ":" + value;
136 }
137 }
138
139 record SemicolonToken(TOKEN_TYPE tokenType, String value) implements SeparatorToken {
140 public String toString() {
141 return tokenType + ":" + value;
142 }
143 }
144
145 record OToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
146 public String toString() {
147 return tokenType + ":" + value;
148 }
149 }
150
151 record CToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
152 public String toString() {
153 return tokenType + ":" + value;
154 }
155 }
156
157 record DotToken(TOKEN_TYPE tokenType, String value) implements SymbolToken {
158 public String toString() {
159 return tokenType + ":" + value;
160 }
161 }
162
163 record PreprocessorToken(TOKEN_TYPE tokenType, String value) implements Token {
164 public String toString() {
165 return tokenType + ":" + value;
166 }
167 }
168
169 record AssignToken(TOKEN_TYPE tokenType, String value) implements Token {
170 public String toString() {
171 return tokenType + ":" + value;
172 }
173 }
174
175 record ArithmeticOperator(TOKEN_TYPE tokenType, String value) implements Token {
176 // https://www.codingeek.com/tutorials/c-programming/precedence-and-associativity-of-operators-in-c/
177 enum Precedence {
178 Multiplicative, Additive
179 }
180
181 public Precedence precedence() {
182 return switch (value) {
183 case "*", "/" -> Precedence.Multiplicative; // multiplacative
184 case "-", "+" -> Precedence.Additive; // additive
185 default -> throw new IllegalStateException("Unexpected value: " + value);
186 };
187 }
188
189 public String toString() {
190 return tokenType + ":" + value + " " + precedence();
191 }
192 }
193
194
195 enum TOKEN_TYPE {
196 NONE(null),// NONE looks useless, but the ordinals for these enums define the groups # from regex. So dont remove ;)
197 WS_AND_LINE_COMMENT("([ \n\t]+|//[^\n]*\n)"),
198 RESERVED("(in|out|mainImage|return)(?![a-zA-Z0-9_])"),
199 UNIFORM("(iTime|iResolution|iMouse)"),
200 CREATE("(ivec[234]|vec[234]|imat[234]|mat[234])(?=[({])"),
201 MATH_LIB_CALL("(abs|clamp|cos|dot|exp|length|normalize|normal|max|min|mix|pow|reflect|sin|sqrt|tan)(?=[({])"),
202 CALL("([a-zA-Z_][a-zA-Z0-9_]*)(?=[({])"),
203 PRIMITIVE_TYPE("(float|int|void)(?![a-zA-Z0-9_])"),
204 VEC_TYPE("(ivec[234]|vec[234])(?![a-zA-Z0-9_])"),
205 MAT_TYPE("(imat[234]|mat[234])(?![a-zA-Z0-9_])"),
206 PRE_PREPROCESSOR("(#define|#include)"),
207 IDENTIFIER("([a-zA-Z_][a-zA-Z0-9_]*)"),
208 CONST("(\\d+\\.?\\d*)"),
209 OSYMBOL("([{(])"),
210 CSYMBOL("([})])"),
211 ASSIGN("(=|\\+=|\\-=|\\*=|/=)"),
212 ARITHMETIC_OPERATOR("([+/\\-*])"),
213 SEMICOLON("(;)"),
214 COMMA("(,)"),
215 DOT("(.)");
216 String regex;
217
218 TOKEN_TYPE(String regex) {
219 this.regex = regex;
220 }
221 }
222
223 private static List<Token> tokenize(String source) {
224 List<Token> tokens = new ArrayList<>();
225 // We walk the enum in order and create a single regex for all token types
226 // order is important ;)
227 StringBuilder regexBuilder = new StringBuilder();
228 for (var token : TOKEN_TYPE.values()) {
229 if (!token.equals(TOKEN_TYPE.NONE)) {
230 if (!regexBuilder.isEmpty()) {
231 regexBuilder.append("|");
232 }
233 regexBuilder.append(token.regex);
234 }
235 }
236
237 // Now try to compile the resulting regex
238 Pattern pattern = Pattern.compile(regexBuilder.toString());
239 Matcher matcher = pattern.matcher(source);
240 while (matcher.find()) {
241 for (var tokenType : TOKEN_TYPE.values()) {
242 if (!tokenType.equals(TOKEN_TYPE.NONE)) {
243 String val = matcher.group(tokenType.ordinal());
244 if (val != null && !val.isEmpty()) {
245 tokens.add(switch (tokenType) {
246 case WS_AND_LINE_COMMENT -> new WSAndLineCommentToken(tokenType, val);
247 case MAT_TYPE -> new MatTypeToken(tokenType, val);
248 case VEC_TYPE -> new VecTypeToken(tokenType, val);
249 case PRIMITIVE_TYPE -> new PrimitiveTypeToken(tokenType, val);
250 case PRE_PREPROCESSOR -> new PreprocessorToken(tokenType, val);
251 case MATH_LIB_CALL -> new MathLibCallToken(tokenType, val);
252 case CALL -> new CallToken(tokenType, val);
253 case CREATE -> new CreateToken(tokenType, val);
254 case UNIFORM -> new UniformToken(tokenType, val);
255 case RESERVED -> new ReservedToken(tokenType, val);
256 case IDENTIFIER -> new IdentifierToken(tokenType, val);
257 case CONST -> val.contains(".")
258 ? new FloatConstToken(tokenType, val, Float.parseFloat(val))
259 : new IntConstToken(tokenType, val, Integer.parseInt(val));
260 case OSYMBOL -> new OToken(tokenType, val);
261 case CSYMBOL -> new CToken(tokenType, val);
262 case ARITHMETIC_OPERATOR -> new ArithmeticOperator(tokenType, val);
263 case SEMICOLON -> new SemicolonToken(tokenType, val);
264 case COMMA -> new CommaToken(tokenType, val);
265 case DOT -> new DotToken(tokenType, val);
266 case ASSIGN -> new AssignToken(tokenType, val);
267 default -> throw new IllegalStateException("We should never get here");
268 });
269 }
270 }
271 }
272 }
273 return tokens;
274 }
275
276 static class Cursor {
277 List<Token> tokens;
278 int idx;
279
280 Cursor(List<Token> tokens, int idx) {
281 this.tokens = tokens;
282 this.idx = idx;
283 }
284
285 Token get() {
286 return tokens.get(idx);
287 }
288
289 Token next() {
290 idx++;
291 while (get() instanceof WSAndLineCommentToken) {
292 idx++;
293 }
294 return get();
295 }
296
297 static Cursor of(List<Token> tokens, int idx) {
298 return idx < tokens.size() ? new Cursor(tokens, idx) : null;
299 }
300 }
301
302 // Example parse: print declarations and functions
303 private static void parse(List<Token> tokens) {
304 for (int i = 0; i < tokens.size(); i++) {
305 var c = Cursor.of(tokens, i);
306 if (c.get() instanceof TypeToken typeToken && c.next() instanceof CallToken callToken && c.next() instanceof OToken oToken) {
307 System.out.println("Function found: returnType=" + typeToken + ", name=" + callToken);
308 }
309 if (c.get() instanceof ArithmeticOperator arithmeticOperator) {
310 System.out.println("Arithmetic " + arithmeticOperator);
311 }
312
313 }
314 }
315
316
317 // Example usage
318 public static void main(String[] args) {
319
320 List<Token> tokens = tokenize("");
321 var ansi = ANSI.of(System.out);
322
323 tokens.forEach(token -> {
324 switch (token) {
325 case WSAndLineCommentToken _ -> ansi.color(ANSI.GREEN, a -> a.apply(token.value()));
326 case ConstToken _ -> ansi.color(ANSI.YELLOW, a -> a.apply(token.value()));
327 case TypeToken _ -> ansi.color(ANSI.BLUE, a -> a.apply(token.value()));
328 case ReservedToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
329 case PreprocessorToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
330 case ArithmeticOperator _ -> ansi.color(ANSI.RED, a -> a.apply(token.value()));
331 case AssignToken _ -> ansi.color(ANSI.RED, a -> a.apply(token.value()));
332 case UniformToken _ -> ansi.color(ANSI.CYAN, a -> a.apply(token.value()));
333 case CallToken _ -> ansi.color(ANSI.PURPLE, a -> a.apply(token.value()));
334 case MathLibCallToken _ -> ansi.color(ANSI.BLUE, a -> a.apply(token.value()));
335 case SeparatorToken _ -> ansi.color(ANSI.GREEN, a -> a.apply(token.value()));
336 default -> ansi.apply(token.value());
337 }
338 });
339 // parse(tokens);
340 }
341
342 }