1 /* 2 * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package oracle.code.json; 27 28 import java.util.HashSet; 29 30 // Responsible for parsing the Json document which validates the contents 31 // and builds the tokens array in JsonDocumentInfo which is used for lazy inflation 32 final class JsonParser { ; 33 34 // Parse the JSON and return the built DocumentInfo w/ tokens array 35 static JsonDocumentInfo parseRoot(JsonDocumentInfo docInfo) { 36 int end = parseValue(docInfo, 0, 0); 37 if (!checkWhitespaces(docInfo, end, docInfo.getEndOffset())) { 38 throw failure(docInfo,"Unexpected character(s)", end); 39 } 40 return docInfo; 41 } 42 43 static int parseValue(JsonDocumentInfo docInfo, int offset, int depth) { 44 offset = skipWhitespaces(docInfo, offset); 45 46 return switch (docInfo.charAt(offset)) { 47 case '{' -> parseObject(docInfo, offset, depth + 1); 48 case '[' -> parseArray(docInfo, offset, depth + 1); 49 case '"' -> parseString(docInfo, offset); 50 case 't', 'f' -> parseBoolean(docInfo, offset); 51 case 'n' -> parseNull(docInfo, offset); 52 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-' -> parseNumber(docInfo, offset); 53 default -> throw failure(docInfo, "Unexpected character(s)", offset); 54 }; 55 } 56 57 static int parseObject(JsonDocumentInfo docInfo, int offset, int depth) { 58 checkDepth(docInfo, offset, depth); 59 var keys = new HashSet<String>(); 60 docInfo.tokens[docInfo.index++] = offset; 61 // Walk past the '{' 62 offset = JsonParser.skipWhitespaces(docInfo, offset + 1); 63 // Check for empty case 64 if (docInfo.charAt(offset) == '}') { 65 docInfo.tokens[docInfo.index++] = offset; 66 return ++offset; 67 } 68 while (offset < docInfo.getEndOffset()) { 69 // Get the key 70 if (docInfo.charAt(offset) != '"') { 71 throw failure(docInfo, "Invalid key", offset); 72 } 73 // Member equality done via unescaped String 74 // see https://datatracker.ietf.org/doc/html/rfc8259#section-8.3 75 docInfo.tokens[docInfo.index++] = offset++; // Move past the starting quote 76 var escape = false; 77 boolean useBldr = false; 78 var start = offset; 79 StringBuilder sb = null; // only init if we need to use for escapes 80 boolean foundClosing = false; 81 for (; offset < docInfo.getEndOffset(); offset++) { 82 var c = docInfo.charAt(offset); 83 if (escape) { 84 var length = 0; 85 switch (c) { 86 // Allowed JSON escapes 87 case '"', '\\', '/' -> {} 88 case 'b' -> c = '\b'; 89 case 'f' -> c = '\f'; 90 case 'n' -> c = '\n'; 91 case 'r' -> c = '\r'; 92 case 't' -> c = '\t'; 93 case 'u' -> { 94 if (offset + 4 < docInfo.getEndOffset()) { 95 c = codeUnit(docInfo, offset + 1); 96 length = 4; 97 } else { 98 throw failure(docInfo, 99 "Illegal Unicode escape sequence", offset); 100 } 101 } 102 default -> throw failure(docInfo, 103 "Illegal escape", offset); 104 } 105 if (!useBldr) { 106 useBldr = true; 107 sb = new StringBuilder(docInfo.substring(start, offset - 1)); 108 } 109 offset+=length; 110 escape = false; 111 } else if (c == '\\') { 112 escape = true; 113 continue; 114 } else if (c == '\"') { 115 docInfo.tokens[docInfo.index++] = offset++; 116 foundClosing = true; 117 break; 118 } else if (c < ' ') { 119 throw failure(docInfo, 120 "Unescaped control code", offset); 121 } 122 if (useBldr) { 123 sb.append(c); 124 } 125 } 126 if (!foundClosing) { 127 throw failure(docInfo, "Closing quote missing", offset); 128 } 129 var keyStr = useBldr ? sb.toString() : 130 docInfo.substring(start, offset - 1); 131 132 // Check for duplicates 133 if (keys.contains(keyStr)) { 134 throw failure(docInfo, 135 "The duplicate key: '%s' was already parsed".formatted(keyStr), offset); 136 } 137 keys.add(keyStr); 138 139 // Move from key to ':' 140 offset = JsonParser.skipWhitespaces(docInfo, offset); 141 docInfo.tokens[docInfo.index++] = offset; 142 if (docInfo.charAt(offset) != ':') { 143 throw failure(docInfo, 144 "Unexpected character(s) found after key", offset); 145 } 146 147 // Move from ':' to JsonValue 148 offset = JsonParser.skipWhitespaces(docInfo, offset + 1); 149 offset = JsonParser.parseValue(docInfo, offset, depth); 150 151 // Walk to either ',' or '}' 152 offset = JsonParser.skipWhitespaces(docInfo, offset); 153 var c = docInfo.charAt(offset); 154 if (c == '}') { 155 docInfo.tokens[docInfo.index++] = offset; 156 return ++offset; 157 } else if (docInfo.charAt(offset) != ',') { 158 break; 159 } 160 161 // Add the comma, and move to the next key 162 docInfo.tokens[docInfo.index++] = offset; 163 offset = JsonParser.skipWhitespaces(docInfo, offset + 1); 164 } 165 throw failure(docInfo, 166 "Unexpected character(s) found after value", offset); 167 } 168 169 static int parseArray(JsonDocumentInfo docInfo, int offset, int depth) { 170 checkDepth(docInfo, offset, depth); 171 docInfo.tokens[docInfo.index++] = offset; 172 // Walk past the '[' 173 offset = JsonParser.skipWhitespaces(docInfo, offset + 1); 174 // Check for empty case 175 if (docInfo.charAt(offset) == ']') { 176 docInfo.tokens[docInfo.index++] = offset; 177 return ++offset; 178 } 179 180 while (offset < docInfo.getEndOffset()) { 181 // Get the JsonValue 182 offset = JsonParser.parseValue(docInfo, offset, depth); 183 // Walk to either ',' or ']' 184 offset = JsonParser.skipWhitespaces(docInfo, offset); 185 var c = docInfo.charAt(offset); 186 if (c == ']') { 187 docInfo.tokens[docInfo.index++] = offset; 188 return ++offset; 189 } else if (c != ',') { 190 break; 191 } 192 193 // Add the comma, and move to the next value 194 docInfo.tokens[docInfo.index++] = offset; 195 offset = JsonParser.skipWhitespaces(docInfo, offset + 1); 196 } 197 throw failure(docInfo, 198 "Unexpected character(s) found after value", offset); 199 } 200 201 static int parseString(JsonDocumentInfo docInfo, int offset) { 202 docInfo.tokens[docInfo.index++] = offset++; // Move past the starting quote 203 var escape = false; 204 205 for (; offset < docInfo.getEndOffset(); offset++) { 206 var c = docInfo.charAt(offset); 207 if (escape) { 208 switch (c) { 209 // Allowed JSON escapes 210 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't' -> {} 211 case 'u' -> { 212 if (offset + 4 < docInfo.getEndOffset()) { 213 checkEscapeSequence(docInfo, offset + 1); 214 offset += 4; 215 } else { 216 throw failure(docInfo, 217 "Illegal Unicode escape sequence", offset); 218 } 219 } 220 default -> throw failure(docInfo, 221 "Illegal escape", offset); 222 } 223 escape = false; 224 } else if (c == '\\') { 225 escape = true; 226 } else if (c == '\"') { 227 docInfo.tokens[docInfo.index++] = offset; 228 return ++offset; 229 } else if (c < ' ') { 230 throw failure(docInfo, 231 "Unescaped control code", offset); 232 } 233 } 234 throw failure(docInfo, "Closing quote missing", offset); 235 } 236 237 // Validate unicode escape sequence 238 static void checkEscapeSequence(JsonDocumentInfo docInfo, int offset) { 239 for (int index = 0; index < 4; index++) { 240 char c = docInfo.charAt(offset + index); 241 if ((c < 'a' || c > 'f') && (c < 'A' || c > 'F') && (c < '0' || c > '9')) { 242 throw failure(docInfo, "Invalid Unicode escape", offset); 243 } 244 } 245 } 246 247 // Validate and construct corresponding value of unicode escape sequence 248 static char codeUnit(JsonDocumentInfo docInfo, int offset) { 249 char val = 0; 250 for (int index = 0; index < 4; index ++) { 251 char c = docInfo.charAt(offset + index); 252 val <<= 4; 253 val += (char) ( 254 switch (c) { 255 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> c - '0'; 256 case 'a', 'b', 'c', 'd', 'e', 'f' -> c - 'a' + 10; 257 case 'A', 'B', 'C', 'D', 'E', 'F' -> c - 'A' + 10; 258 default -> throw new InternalError(); 259 }); 260 } 261 return val; 262 } 263 264 static int parseBoolean(JsonDocumentInfo docInfo, int offset) { 265 var start = docInfo.charAt(offset); 266 if (start == 't') { 267 if (offset + 3 >= docInfo.getEndOffset() || !docInfo.substring(offset + 1, offset + 4).equals("rue")) { 268 throw failure(docInfo, "Unexpected character(s)", offset); 269 } 270 return offset + 4; 271 } else { 272 if (offset + 4 >= docInfo.getEndOffset() || !docInfo.substring(offset + 1, offset + 5).equals("alse")) { 273 throw failure(docInfo, "Unexpected character(s)", offset); 274 } 275 return offset + 5; 276 } 277 } 278 279 static int parseNull(JsonDocumentInfo docInfo, int offset) { 280 if (offset + 3 >= docInfo.getEndOffset() || !docInfo.substring(offset + 1, offset + 4).equals("ull")) { 281 throw failure(docInfo, "Unexpected character(s)", offset); 282 } 283 return offset + 4; 284 } 285 286 static int parseNumber(JsonDocumentInfo docInfo, int offset) { 287 boolean sawDecimal = false; 288 boolean sawExponent = false; 289 boolean sawZero = false; 290 boolean sawWhitespace = false; 291 boolean havePart = false; 292 boolean sawInvalid = false; 293 boolean sawSign = false; 294 var start = offset; 295 for (; offset < docInfo.getEndOffset() && !sawWhitespace && !sawInvalid; offset++) { 296 switch (docInfo.charAt(offset)) { 297 case '-' -> { 298 if (offset != start && !sawExponent || sawSign) { 299 throw failure(docInfo, 300 "Invalid '-' position", offset); 301 } 302 sawSign = true; 303 } 304 case '+' -> { 305 if (!sawExponent || havePart || sawSign) { 306 throw failure(docInfo, 307 "Invalid '+' position", offset); 308 } 309 sawSign = true; 310 } 311 case '0' -> { 312 if (!havePart) { 313 sawZero = true; 314 } 315 havePart = true; 316 } 317 case '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { 318 if (!sawDecimal && !sawExponent && sawZero) { 319 throw failure(docInfo, 320 "Invalid '0' position", offset); 321 } 322 havePart = true; 323 } 324 case '.' -> { 325 if (sawDecimal) { 326 throw failure(docInfo, 327 "Invalid '.' position", offset); 328 } else { 329 if (!havePart) { 330 throw failure(docInfo, 331 "Invalid '.' position", offset); 332 } 333 sawDecimal = true; 334 havePart = false; 335 } 336 } 337 case 'e', 'E' -> { 338 if (sawExponent) { 339 throw failure(docInfo, 340 "Invalid '[e|E]' position", offset); 341 } else { 342 if (!havePart) { 343 throw failure(docInfo, 344 "Invalid '[e|E]' position", offset); 345 } 346 sawExponent = true; 347 havePart = false; 348 sawSign = false; 349 } 350 } 351 case ' ', '\t', '\r', '\n' -> { 352 sawWhitespace = true; 353 offset --; 354 } 355 default -> { 356 offset--; 357 sawInvalid = true; 358 } 359 } 360 } 361 if (!havePart) { 362 throw failure(docInfo, 363 "Input expected after '[.|e|E]'", offset); 364 } 365 return offset; 366 } 367 368 // Utility functions 369 static int skipWhitespaces(JsonDocumentInfo docInfo, int offset) { 370 while (offset < docInfo.getEndOffset()) { 371 if (notWhitespace(docInfo, offset)) { 372 break; 373 } 374 offset ++; 375 } 376 return offset; 377 } 378 379 static boolean checkWhitespaces(JsonDocumentInfo docInfo, int offset, int endOffset) { 380 int end = Math.min(endOffset, docInfo.getEndOffset()); 381 while (offset < end) { 382 if (notWhitespace(docInfo, offset)) { 383 return false; 384 } 385 offset ++; 386 } 387 return true; 388 } 389 390 static boolean notWhitespace(JsonDocumentInfo docInfo, int offset) { 391 return !isWhitespace(docInfo, offset); 392 } 393 394 static boolean isWhitespace(JsonDocumentInfo docInfo, int offset) { 395 return switch (docInfo.charAt(offset)) { 396 case ' ', '\t','\r' -> true; 397 case '\n' -> { 398 docInfo.line+=1; 399 docInfo.lineStart = offset + 1; 400 yield true; 401 } 402 default -> false; 403 }; 404 } 405 406 static JsonParseException failure(JsonDocumentInfo docInfo, String message, int offset) { 407 var errMsg = docInfo.composeParseExceptionMessage( 408 message, docInfo.line, docInfo.lineStart, offset); 409 return new JsonParseException(errMsg, docInfo.line, offset - docInfo.lineStart); 410 } 411 412 private static void checkDepth(JsonDocumentInfo docInfo, int offset, int depth) { 413 if (depth > Json.MAX_DEPTH) { 414 throw failure(docInfo, "Max depth exceeded", offset); 415 } 416 } 417 418 // no instantiation of this parser 419 private JsonParser(){} 420 }