1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef SHARE_RUNTIME_SIGNATURE_HPP 27 #define SHARE_RUNTIME_SIGNATURE_HPP 28 29 #include "memory/allocation.hpp" 30 #include "oops/method.hpp" 31 32 // Static routines and parsing loops for processing field and method 33 // descriptors. In the HotSpot sources we call them "signatures". 34 // 35 // A SignatureStream iterates over a Java descriptor (or parts of it). 36 // The syntax is documented in the Java Virtual Machine Specification, 37 // section 4.3. 38 // 39 // The syntax may be summarized as follows: 40 // 41 // MethodType: '(' {FieldType}* ')' (FieldType | 'V') 42 // FieldType: PrimitiveType | ObjectType | ArrayType 43 // PrimitiveType: 'B' | 'C' | 'D' | 'F' | 'I' | 'J' | 'S' | 'Z' 44 // ObjectType: 'L' ClassName ';' | ArrayType 45 // ArrayType: '[' FieldType 46 // ClassName: {UnqualifiedName '/'}* UnqualifiedName 47 // UnqualifiedName: NameChar {NameChar}* 48 // NameChar: ANY_CHAR_EXCEPT('/' | '.' | ';' | '[') 49 // 50 // All of the concrete characters in the above grammar are given 51 // standard manifest constant names of the form JVM_SIGNATURE_x. 52 // Executable code uses these constant names in preference to raw 53 // character constants. Comments and assertion code sometimes use 54 // the raw character constants for brevity. 55 // 56 // The primitive field types (like 'I') correspond 1-1 with type codes 57 // (like T_INT) which form part of the specification of the 'newarray' 58 // instruction (JVMS 6.5, section on newarray). These type codes are 59 // widely used in the HotSpot code. They are joined by ad hoc codes 60 // like T_OBJECT and T_ARRAY (defined in HotSpot but not in the JVMS) 61 // so that each "basic type" of field descriptor (or void return type) 62 // has a corresponding T_x code. Thus, while T_x codes play a very 63 // minor role in the JVMS, they play a major role in the HotSpot 64 // sources. There are fewer than 16 such "basic types", so they fit 65 // nicely into bitfields. 66 // 67 // The syntax of ClassName overlaps slightly with the descriptor 68 // syntaxes. The strings "I" and "(I)V" are both class names 69 // *and* descriptors. If a class name contains any character other 70 // than "BCDFIJSZ()V" it cannot be confused with a descriptor. 71 // Class names inside of descriptors are always contained in an 72 // "envelope" syntax which starts with 'L' and ends with ';'. 73 // 74 // As a confounding factor, array types report their type name strings 75 // in descriptor format. These name strings are easy to recognize, 76 // since they begin with '['. For this reason some API points on 77 // HotSpot look for array descriptors as well as proper class names. 78 // 79 // For historical reasons some API points that accept class names and 80 // array names also look for class names wrapped inside an envelope 81 // (like "LFoo;") and unwrap them on the fly (to a name like "Foo"). 82 83 class Signature : AllStatic { 84 private: 85 static bool is_valid_array_signature(const Symbol* sig); 86 87 public: 88 89 // Returns the basic type of a field signature (or T_VOID for "V"). 90 // Assumes the signature is a valid field descriptor. 91 // Do not apply this function to class names or method signatures. 92 static BasicType basic_type(const Symbol* signature) { 93 return basic_type(signature->char_at(0)); 94 } 95 96 // Returns T_ILLEGAL for an illegal signature char. 97 static BasicType basic_type(int ch); 98 99 // Assuming it is either a class name or signature, 100 // determine if it in fact is an array descriptor. 101 static bool is_array(const Symbol* signature) { 102 return (signature->utf8_length() > 1 && 103 signature->char_at(0) == JVM_SIGNATURE_ARRAY && 104 is_valid_array_signature(signature)); 105 } 106 107 // Assuming it is either a class name or signature, 108 // determine if it contains a class name plus ';'. 109 static bool has_envelope(const Symbol* signature) { 110 return ((signature->utf8_length() > 0) && 111 signature->ends_with(JVM_SIGNATURE_ENDCLASS) && 112 has_envelope(signature->char_at(0))); 113 } 114 115 // Determine if this signature char introduces an 116 // envelope, which is a class name plus ';'. 117 static bool has_envelope(char signature_char) { 118 return (signature_char == JVM_SIGNATURE_CLASS); 119 } 120 121 // Assuming has_envelope is true, return the symbol 122 // inside the envelope, by stripping 'L' and ';'. 123 // Caller is responsible for decrementing the newly created 124 // Symbol's refcount, use TempNewSymbol. 125 static Symbol* strip_envelope(const Symbol* signature); 126 127 // Assuming it's either a field or method descriptor, determine 128 // whether it is in fact a method descriptor: 129 static bool is_method(const Symbol* signature) { 130 return signature->starts_with(JVM_SIGNATURE_FUNC); 131 } 132 133 // Assuming it's a method signature, determine if it must 134 // return void. 135 static bool is_void_method(const Symbol* signature) { 136 assert(is_method(signature), "signature is not for a method"); 137 return signature->ends_with(JVM_SIGNATURE_VOID); 138 } 139 }; 140 141 // A SignatureIterator uses a SignatureStream to produce BasicType 142 // results, discarding class names. This means it can be accelerated 143 // using a fingerprint mechanism, in many cases, without loss of type 144 // information. The FingerPrinter class computes and caches this 145 // reduced information for faster iteration. 146 147 class SignatureIterator: public ResourceObj { 148 public: 149 typedef uint64_t fingerprint_t; 150 151 protected: 152 Symbol* _signature; // the signature to iterate over 153 BasicType _return_type; 154 fingerprint_t _fingerprint; 155 156 public: 157 // Definitions used in generating and iterating the 158 // bit field form of the signature generated by the 159 // Fingerprinter. 160 enum { 161 fp_static_feature_size = 1, 162 fp_is_static_bit = 1, 163 164 fp_result_feature_size = 4, 165 fp_result_feature_mask = right_n_bits(fp_result_feature_size), 166 fp_parameter_feature_size = 4, 167 fp_parameter_feature_mask = right_n_bits(fp_parameter_feature_size), 168 169 fp_parameters_done = 0, // marker for end of parameters (must be zero) 170 171 // Parameters take up full wordsize, minus the result and static bit fields. 172 // Since fp_parameters_done is zero, termination field arises from shifting 173 // in zero bits, and therefore occupies no extra space. 174 // The sentinel value is all-zero-bits, which is impossible for a true 175 // fingerprint, since at least the result field will be non-zero. 176 fp_max_size_of_parameters = ((BitsPerLong 177 - (fp_result_feature_size + fp_static_feature_size)) 178 / fp_parameter_feature_size) 179 }; 180 181 static bool fp_is_valid_type(BasicType type, bool for_return_type = false); 182 183 // Sentinel values are zero and not-zero (-1). 184 // No need to protect the sign bit, since every valid return type is non-zero 185 // (even T_VOID), and there are no valid parameter fields which are 0xF (T_VOID). 186 static fingerprint_t zero_fingerprint() { return (fingerprint_t)0; } 187 static fingerprint_t overflow_fingerprint() { return ~(fingerprint_t)0; } 188 static bool fp_is_valid(fingerprint_t fingerprint) { 189 return (fingerprint != zero_fingerprint()) && (fingerprint != overflow_fingerprint()); 190 } 191 192 // Constructors 193 SignatureIterator(Symbol* signature, fingerprint_t fingerprint = zero_fingerprint()) { 194 _signature = signature; 195 _return_type = T_ILLEGAL; // sentinel value for uninitialized 196 _fingerprint = zero_fingerprint(); 197 if (fingerprint != _fingerprint) { 198 set_fingerprint(fingerprint); 199 } 200 } 201 202 // If the fingerprint is present, we can use an accelerated loop. 203 void set_fingerprint(fingerprint_t fingerprint); 204 205 // Returns the set fingerprint, or zero_fingerprint() 206 // if none has been set already. 207 fingerprint_t fingerprint() const { return _fingerprint; } 208 209 // Iteration 210 // Hey look: There are no virtual methods in this class. 211 // So how is it customized? By calling do_parameters_on 212 // an object which answers to "do_type(BasicType)". 213 // By convention, this object is in the subclass 214 // itself, so the call is "do_parameters_on(this)". 215 // The effect of this is to inline the parsing loop 216 // everywhere "do_parameters_on" is called. 217 // If there is a valid fingerprint in the object, 218 // an improved loop is called which just unpacks the 219 // bitfields from the fingerprint. Otherwise, the 220 // symbol is parsed. 221 template<typename T> inline void do_parameters_on(T* callback); // iterates over parameters only 222 BasicType return_type(); // computes the value on the fly if necessary 223 224 static BasicType fp_return_type(fingerprint_t fingerprint) { 225 assert(fp_is_valid(fingerprint), "invalid fingerprint"); 226 return (BasicType) ((fingerprint >> fp_static_feature_size) & fp_result_feature_mask); 227 } 228 static fingerprint_t fp_start_parameters(fingerprint_t fingerprint) { 229 assert(fp_is_valid(fingerprint), "invalid fingerprint"); 230 return fingerprint >> (fp_static_feature_size + fp_result_feature_size); 231 } 232 static BasicType fp_next_parameter(fingerprint_t& mask) { 233 int result = (mask & fp_parameter_feature_mask); 234 mask >>= fp_parameter_feature_size; 235 return (BasicType) result; 236 } 237 }; 238 239 240 // Specialized SignatureIterators: Used to compute signature specific values. 241 242 class SignatureTypeNames : public SignatureIterator { 243 protected: 244 virtual void type_name(const char* name) = 0; 245 246 friend class SignatureIterator; // so do_parameters_on can call do_type 247 void do_type(BasicType type) { 248 switch (type) { 249 case T_BOOLEAN: type_name("jboolean"); break; 250 case T_CHAR: type_name("jchar" ); break; 251 case T_FLOAT: type_name("jfloat" ); break; 252 case T_DOUBLE: type_name("jdouble" ); break; 253 case T_BYTE: type_name("jbyte" ); break; 254 case T_SHORT: type_name("jshort" ); break; 255 case T_INT: type_name("jint" ); break; 256 case T_LONG: type_name("jlong" ); break; 257 case T_VOID: type_name("void" ); break; 258 case T_ARRAY: 259 case T_OBJECT: type_name("jobject" ); break; 260 default: ShouldNotReachHere(); 261 } 262 } 263 264 public: 265 SignatureTypeNames(Symbol* signature) : SignatureIterator(signature) {} 266 }; 267 268 269 // Specialized SignatureIterator: Used to compute the argument size. 270 271 class ArgumentSizeComputer: public SignatureIterator { 272 private: 273 int _size; 274 friend class SignatureIterator; // so do_parameters_on can call do_type 275 void do_type(BasicType type) { _size += parameter_type_word_count(type); } 276 public: 277 ArgumentSizeComputer(Symbol* signature); 278 int size() { return _size; } 279 }; 280 281 282 class ArgumentCount: public SignatureIterator { 283 private: 284 int _size; 285 friend class SignatureIterator; // so do_parameters_on can call do_type 286 void do_type(BasicType type) { _size++; } 287 public: 288 ArgumentCount(Symbol* signature); 289 int size() { return _size; } 290 }; 291 292 293 class ReferenceArgumentCount: public SignatureIterator { 294 private: 295 int _refs; 296 friend class SignatureIterator; // so do_parameters_on can call do_type 297 void do_type(BasicType type) { if (is_reference_type(type)) _refs++; } 298 public: 299 ReferenceArgumentCount(Symbol* signature); 300 int count() { return _refs; } 301 }; 302 303 304 // Specialized SignatureIterator: Used to compute the result type. 305 306 class ResultTypeFinder: public SignatureIterator { 307 public: 308 BasicType type() { return return_type(); } 309 ResultTypeFinder(Symbol* signature) : SignatureIterator(signature) { } 310 }; 311 312 313 // Fingerprinter computes a unique ID for a given method. The ID 314 // is a bitvector characterizing the methods signature (incl. the receiver). 315 class Fingerprinter: public SignatureIterator { 316 private: 317 fingerprint_t _accumulator; 318 int _param_size; 319 int _stack_arg_slots; 320 int _shift_count; 321 const Method* _method; 322 323 uint _int_args; 324 uint _fp_args; 325 326 void initialize_accumulator() { 327 _accumulator = 0; 328 _shift_count = fp_result_feature_size + fp_static_feature_size; 329 _param_size = 0; 330 _stack_arg_slots = 0; 331 } 332 333 // Out-of-line method does it all in constructor: 334 void compute_fingerprint_and_return_type(bool static_flag = false); 335 336 void initialize_calling_convention(bool static_flag); 337 void do_type_calling_convention(BasicType type); 338 339 friend class SignatureIterator; // so do_parameters_on can call do_type 340 341 void do_type(BasicType type) { 342 assert(fp_is_valid_type(type), "bad parameter type"); 343 if (_param_size <= fp_max_size_of_parameters) { 344 _accumulator |= ((fingerprint_t)type << _shift_count); 345 _shift_count += fp_parameter_feature_size; 346 } 347 _param_size += (is_double_word_type(type) ? 2 : 1); 348 do_type_calling_convention(type); 349 } 350 351 public: 352 int size_of_parameters() const { return _param_size; } 353 int num_stack_arg_slots() const { return _stack_arg_slots; } 354 355 // fingerprint() and return_type() are in super class 356 357 Fingerprinter(const methodHandle& method) 358 : SignatureIterator(method->signature()), 359 _method(method()) { 360 compute_fingerprint_and_return_type(); 361 } 362 Fingerprinter(Symbol* signature, bool is_static) 363 : SignatureIterator(signature), 364 _method(nullptr) { 365 compute_fingerprint_and_return_type(is_static); 366 } 367 }; 368 369 370 // Specialized SignatureIterator: Used for native call purposes 371 372 class NativeSignatureIterator: public SignatureIterator { 373 private: 374 methodHandle _method; 375 // We need separate JNI and Java offset values because in 64 bit mode, 376 // the argument offsets are not in sync with the Java stack. 377 // For example a long takes up 1 "C" stack entry but 2 Java stack entries. 378 int _offset; // The java stack offset 379 int _prepended; // number of prepended JNI parameters (1 JNIEnv, plus 1 mirror if static) 380 int _jni_offset; // the current parameter offset, starting with 0 381 382 friend class SignatureIterator; // so do_parameters_on can call do_type 383 void do_type(BasicType type) { 384 switch (type) { 385 case T_BYTE: 386 case T_BOOLEAN: 387 pass_byte(); _jni_offset++; _offset++; 388 break; 389 case T_CHAR: 390 case T_SHORT: 391 pass_short(); _jni_offset++; _offset++; 392 break; 393 case T_INT: 394 pass_int(); _jni_offset++; _offset++; 395 break; 396 case T_FLOAT: 397 pass_float(); _jni_offset++; _offset++; 398 break; 399 case T_DOUBLE: { 400 int jni_offset = LP64_ONLY(1) NOT_LP64(2); 401 pass_double(); _jni_offset += jni_offset; _offset += 2; 402 break; 403 } 404 case T_LONG: { 405 int jni_offset = LP64_ONLY(1) NOT_LP64(2); 406 pass_long(); _jni_offset += jni_offset; _offset += 2; 407 break; 408 } 409 case T_ARRAY: 410 case T_OBJECT: 411 pass_object(); _jni_offset++; _offset++; 412 break; 413 default: 414 ShouldNotReachHere(); 415 } 416 } 417 418 public: 419 methodHandle method() const { return _method; } 420 int offset() const { return _offset; } 421 int jni_offset() const { return _jni_offset + _prepended; } 422 bool is_static() const { return method()->is_static(); } 423 virtual void pass_int() = 0; 424 virtual void pass_long() = 0; 425 virtual void pass_object() = 0; // objects, arrays, inlines 426 virtual void pass_float() = 0; 427 virtual void pass_byte() { pass_int(); }; 428 virtual void pass_short() { pass_int(); }; 429 #ifdef _LP64 430 virtual void pass_double() = 0; 431 #else 432 virtual void pass_double() { pass_long(); } // may be same as long 433 #endif 434 435 NativeSignatureIterator(const methodHandle& method) : SignatureIterator(method->signature()) { 436 _method = method; 437 _offset = 0; 438 _jni_offset = 0; 439 440 const int JNIEnv_words = 1; 441 const int mirror_words = 1; 442 _prepended = !is_static() ? JNIEnv_words : JNIEnv_words + mirror_words; 443 } 444 445 void iterate() { iterate(Fingerprinter(method()).fingerprint()); } 446 447 // iterate() calls the 3 virtual methods according to the following invocation syntax: 448 // 449 // {pass_int | pass_long | pass_object} 450 // 451 // Arguments are handled from left to right (receiver first, if any). 452 // The offset() values refer to the Java stack offsets but are 0 based and increasing. 453 // The java_offset() values count down to 0, and refer to the Java TOS. 454 // The jni_offset() values increase from 1 or 2, and refer to C arguments. 455 // The method's return type is ignored. 456 457 void iterate(fingerprint_t fingerprint) { 458 set_fingerprint(fingerprint); 459 if (!is_static()) { 460 // handle receiver (not handled by iterate because not in signature) 461 pass_object(); _jni_offset++; _offset++; 462 } 463 do_parameters_on(this); 464 } 465 }; 466 467 468 // This is the core parsing logic for iterating over signatures. 469 // All of the previous classes use this for doing their work. 470 471 class SignatureStream : public StackObj { 472 private: 473 const Symbol* _signature; 474 int _begin; 475 int _end; 476 int _limit; 477 int _array_prefix; // count of '[' before the array element descr 478 BasicType _type; 479 int _state; 480 Symbol* _previous_name; // cache the previously looked up symbol to avoid lookups 481 GrowableArray<Symbol*>* _names; // symbols created while parsing that need to be dereferenced 482 483 Symbol* find_symbol(); 484 485 enum { _s_field = 0, _s_method = 1, _s_method_return = 3 }; 486 void set_done() { 487 _state |= -2; // preserve s_method bit 488 assert(is_done(), "Unable to set state to done"); 489 } 490 int scan_type(BasicType bt); 491 492 public: 493 bool at_return_type() const { return _state == (int)_s_method_return; } 494 bool is_done() const { return _state < 0; } 495 void next(); 496 497 SignatureStream(const Symbol* signature, bool is_method = true); 498 ~SignatureStream(); 499 500 bool is_reference() const { return is_reference_type(_type); } 501 bool is_array() const { return _type == T_ARRAY; } 502 BasicType type() const { return _type; } 503 504 const u1* raw_bytes() const { return _signature->bytes() + _begin; } 505 int raw_length() const { return _end - _begin; } 506 int raw_symbol_begin() const { return _begin + (has_envelope() ? 1 : 0); } 507 int raw_symbol_end() const { return _end - (has_envelope() ? 1 : 0); } 508 char raw_char_at(int i) const { 509 assert(i < _limit, "index for raw_char_at is over the limit"); 510 return _signature->char_at(i); 511 } 512 513 // True if there is an embedded class name in this type, 514 // followed by ';'. 515 bool has_envelope() const { 516 if (!Signature::has_envelope(_signature->char_at(_begin))) 517 return false; 518 // this should always be true, but let's test it: 519 assert(_signature->char_at(_end-1) == JVM_SIGNATURE_ENDCLASS, "signature envelope has no semi-colon at end"); 520 return true; 521 } 522 523 // return the symbol for chars in symbol_begin()..symbol_end() 524 Symbol* as_symbol() { 525 return find_symbol(); 526 } 527 528 // in case you want only the return type: 529 void skip_to_return_type(); 530 531 // number of '[' in array prefix 532 int array_prefix_length() { 533 return _type == T_ARRAY ? _array_prefix : 0; 534 } 535 536 // In case you want only the array base type, 537 // reset the stream after skipping some brackets '['. 538 // (The argument is clipped to array_prefix_length(), 539 // and if it ends up as zero this call is a nop. 540 // The default is value skips all brackets '['.) 541 private: 542 int skip_whole_array_prefix(); 543 public: 544 int skip_array_prefix(int max_skip_length) { 545 if (_type != T_ARRAY) { 546 return 0; 547 } 548 if (_array_prefix > max_skip_length) { 549 // strip some but not all levels of T_ARRAY 550 _array_prefix -= max_skip_length; 551 _begin += max_skip_length; 552 return max_skip_length; 553 } 554 return skip_whole_array_prefix(); 555 } 556 int skip_array_prefix() { 557 if (_type != T_ARRAY) { 558 return 0; 559 } 560 return skip_whole_array_prefix(); 561 } 562 563 // free-standing lookups (bring your own CL/PD pair) 564 enum FailureMode { ReturnNull, NCDFError, CachedOrNull }; 565 Klass* as_klass(Handle class_loader, FailureMode failure_mode, TRAPS); 566 oop as_java_mirror(Handle class_loader, FailureMode failure_mode, TRAPS); 567 }; 568 569 // Specialized SignatureStream: used for invoking SystemDictionary to either find 570 // or resolve the underlying type when iterating over a 571 // Java descriptor (or parts of it). 572 class ResolvingSignatureStream : public SignatureStream { 573 Klass* _load_origin; 574 bool _handles_cached; 575 Handle _class_loader; // cached when needed 576 577 void initialize_load_origin(Klass* load_origin) { 578 _load_origin = load_origin; 579 _handles_cached = (load_origin == nullptr); 580 } 581 void need_handles() { 582 if (!_handles_cached) { 583 cache_handles(); 584 _handles_cached = true; 585 } 586 } 587 void cache_handles(); 588 589 public: 590 ResolvingSignatureStream(Symbol* signature, Klass* load_origin, bool is_method = true); 591 ResolvingSignatureStream(Symbol* signature, Handle class_loader, bool is_method = true); 592 ResolvingSignatureStream(const Method* method); 593 594 Klass* as_klass(FailureMode failure_mode, TRAPS) { 595 need_handles(); 596 return SignatureStream::as_klass(_class_loader, failure_mode, THREAD); 597 } 598 oop as_java_mirror(FailureMode failure_mode, TRAPS) { 599 if (is_reference()) { 600 need_handles(); 601 } 602 return SignatureStream::as_java_mirror(_class_loader, failure_mode, THREAD); 603 } 604 }; 605 606 // Here is how all the SignatureIterator classes invoke the 607 // SignatureStream engine to do their parsing. 608 template<typename T> inline 609 void SignatureIterator::do_parameters_on(T* callback) { 610 fingerprint_t unaccumulator = _fingerprint; 611 612 // Check for too many arguments, or missing fingerprint: 613 if (!fp_is_valid(unaccumulator)) { 614 SignatureStream ss(_signature); 615 for (; !ss.at_return_type(); ss.next()) { 616 callback->do_type(ss.type()); 617 } 618 // while we are here, capture the return type 619 _return_type = ss.type(); 620 } else { 621 // Optimized version of do_parameters when fingerprint is known 622 assert(_return_type != T_ILLEGAL, "return type already captured from fp"); 623 unaccumulator = fp_start_parameters(unaccumulator); 624 for (BasicType type; (type = fp_next_parameter(unaccumulator)) != (BasicType)fp_parameters_done; ) { 625 assert(fp_is_valid_type(type), "garbled fingerprint"); 626 callback->do_type(type); 627 } 628 } 629 } 630 631 #ifdef ASSERT 632 class SignatureVerifier : public StackObj { 633 public: 634 static bool is_valid_method_signature(Symbol* sig); 635 static bool is_valid_type_signature(Symbol* sig); 636 private: 637 static ssize_t is_valid_type(const char*, ssize_t); 638 }; 639 #endif 640 #endif // SHARE_RUNTIME_SIGNATURE_HPP