1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef SHARE_RUNTIME_SIGNATURE_HPP
27 #define SHARE_RUNTIME_SIGNATURE_HPP
28
29 #include "memory/allocation.hpp"
30 #include "oops/method.hpp"
31
32 // Static routines and parsing loops for processing field and method
33 // descriptors. In the HotSpot sources we call them "signatures".
34 //
35 // A SignatureStream iterates over a Java descriptor (or parts of it).
36 // The syntax is documented in the Java Virtual Machine Specification,
37 // section 4.3.
38 //
39 // The syntax may be summarized as follows:
40 //
41 // MethodType: '(' {FieldType}* ')' (FieldType | 'V')
42 // FieldType: PrimitiveType | ObjectType | ArrayType
43 // PrimitiveType: 'B' | 'C' | 'D' | 'F' | 'I' | 'J' | 'S' | 'Z'
44 // ObjectType: 'L' ClassName ';' | ArrayType
45 // ArrayType: '[' FieldType
46 // ClassName: {UnqualifiedName '/'}* UnqualifiedName
47 // UnqualifiedName: NameChar {NameChar}*
48 // NameChar: ANY_CHAR_EXCEPT('/' | '.' | ';' | '[')
49 //
50 // All of the concrete characters in the above grammar are given
51 // standard manifest constant names of the form JVM_SIGNATURE_x.
52 // Executable code uses these constant names in preference to raw
53 // character constants. Comments and assertion code sometimes use
54 // the raw character constants for brevity.
55 //
56 // The primitive field types (like 'I') correspond 1-1 with type codes
57 // (like T_INT) which form part of the specification of the 'newarray'
58 // instruction (JVMS 6.5, section on newarray). These type codes are
59 // widely used in the HotSpot code. They are joined by ad hoc codes
60 // like T_OBJECT and T_ARRAY (defined in HotSpot but not in the JVMS)
61 // so that each "basic type" of field descriptor (or void return type)
62 // has a corresponding T_x code. Thus, while T_x codes play a very
63 // minor role in the JVMS, they play a major role in the HotSpot
64 // sources. There are fewer than 16 such "basic types", so they fit
65 // nicely into bitfields.
66 //
67 // The syntax of ClassName overlaps slightly with the descriptor
68 // syntaxes. The strings "I" and "(I)V" are both class names
69 // *and* descriptors. If a class name contains any character other
70 // than "BCDFIJSZ()V" it cannot be confused with a descriptor.
71 // Class names inside of descriptors are always contained in an
72 // "envelope" syntax which starts with 'L' and ends with ';'.
73 //
74 // As a confounding factor, array types report their type name strings
75 // in descriptor format. These name strings are easy to recognize,
76 // since they begin with '['. For this reason some API points on
77 // HotSpot look for array descriptors as well as proper class names.
78 //
79 // For historical reasons some API points that accept class names and
80 // array names also look for class names wrapped inside an envelope
81 // (like "LFoo;") and unwrap them on the fly (to a name like "Foo").
82
83 class Signature : AllStatic {
84 private:
85 static bool is_valid_array_signature(const Symbol* sig);
86
87 public:
88
89 // Returns the basic type of a field signature (or T_VOID for "V").
90 // Assumes the signature is a valid field descriptor.
91 // Do not apply this function to class names or method signatures.
92 static BasicType basic_type(const Symbol* signature) {
93 return basic_type(signature->char_at(0));
94 }
95
96 // Returns T_ILLEGAL for an illegal signature char.
97 static BasicType basic_type(int ch);
98
99 // Assuming it is either a class name or signature,
100 // determine if it in fact is an array descriptor.
101 static bool is_array(const Symbol* signature) {
102 return (signature->utf8_length() > 1 &&
103 signature->char_at(0) == JVM_SIGNATURE_ARRAY &&
104 is_valid_array_signature(signature));
105 }
106
107 // Assuming it is either a class name or signature,
108 // determine if it contains a class name plus ';'.
109 static bool has_envelope(const Symbol* signature) {
110 return ((signature->utf8_length() > 0) &&
111 signature->ends_with(JVM_SIGNATURE_ENDCLASS) &&
112 has_envelope(signature->char_at(0)));
113 }
114
115 // Determine if this signature char introduces an
116 // envelope, which is a class name plus ';'.
117 static bool has_envelope(char signature_char) {
118 return (signature_char == JVM_SIGNATURE_CLASS);
119 }
120
121 // Assuming has_envelope is true, return the symbol
122 // inside the envelope, by stripping 'L' and ';'.
123 // Caller is responsible for decrementing the newly created
124 // Symbol's refcount, use TempNewSymbol.
125 static Symbol* strip_envelope(const Symbol* signature);
126
127 // Assuming it's either a field or method descriptor, determine
128 // whether it is in fact a method descriptor:
129 static bool is_method(const Symbol* signature) {
130 return signature->starts_with(JVM_SIGNATURE_FUNC);
131 }
132
133 // Assuming it's a method signature, determine if it must
134 // return void.
135 static bool is_void_method(const Symbol* signature) {
136 assert(is_method(signature), "signature is not for a method");
137 return signature->ends_with(JVM_SIGNATURE_VOID);
138 }
139 };
140
141 // A SignatureIterator uses a SignatureStream to produce BasicType
142 // results, discarding class names. This means it can be accelerated
143 // using a fingerprint mechanism, in many cases, without loss of type
144 // information. The FingerPrinter class computes and caches this
145 // reduced information for faster iteration.
146
147 class SignatureIterator: public ResourceObj {
148 public:
149 typedef uint64_t fingerprint_t;
150
151 protected:
152 Symbol* _signature; // the signature to iterate over
153 BasicType _return_type;
154 fingerprint_t _fingerprint;
155
156 public:
157 // Definitions used in generating and iterating the
158 // bit field form of the signature generated by the
159 // Fingerprinter.
160 enum {
161 fp_static_feature_size = 1,
162 fp_is_static_bit = 1,
163
164 fp_result_feature_size = 4,
165 fp_result_feature_mask = right_n_bits(fp_result_feature_size),
166 fp_parameter_feature_size = 4,
167 fp_parameter_feature_mask = right_n_bits(fp_parameter_feature_size),
168
169 fp_parameters_done = 0, // marker for end of parameters (must be zero)
170
171 // Parameters take up full wordsize, minus the result and static bit fields.
172 // Since fp_parameters_done is zero, termination field arises from shifting
173 // in zero bits, and therefore occupies no extra space.
174 // The sentinel value is all-zero-bits, which is impossible for a true
175 // fingerprint, since at least the result field will be non-zero.
176 fp_max_size_of_parameters = ((BitsPerLong
177 - (fp_result_feature_size + fp_static_feature_size))
178 / fp_parameter_feature_size)
179 };
180
181 static bool fp_is_valid_type(BasicType type, bool for_return_type = false);
182
183 // Sentinel values are zero and not-zero (-1).
184 // No need to protect the sign bit, since every valid return type is non-zero
185 // (even T_VOID), and there are no valid parameter fields which are 0xF (T_VOID).
186 static fingerprint_t zero_fingerprint() { return (fingerprint_t)0; }
187 static fingerprint_t overflow_fingerprint() { return ~(fingerprint_t)0; }
188 static bool fp_is_valid(fingerprint_t fingerprint) {
189 return (fingerprint != zero_fingerprint()) && (fingerprint != overflow_fingerprint());
190 }
191
192 // Constructors
193 SignatureIterator(Symbol* signature, fingerprint_t fingerprint = zero_fingerprint()) {
194 _signature = signature;
195 _return_type = T_ILLEGAL; // sentinel value for uninitialized
196 _fingerprint = zero_fingerprint();
197 if (fingerprint != _fingerprint) {
198 set_fingerprint(fingerprint);
199 }
200 }
201
202 // If the fingerprint is present, we can use an accelerated loop.
203 void set_fingerprint(fingerprint_t fingerprint);
204
205 // Returns the set fingerprint, or zero_fingerprint()
206 // if none has been set already.
207 fingerprint_t fingerprint() const { return _fingerprint; }
208
209 // Iteration
210 // Hey look: There are no virtual methods in this class.
211 // So how is it customized? By calling do_parameters_on
212 // an object which answers to "do_type(BasicType)".
213 // By convention, this object is in the subclass
214 // itself, so the call is "do_parameters_on(this)".
215 // The effect of this is to inline the parsing loop
216 // everywhere "do_parameters_on" is called.
217 // If there is a valid fingerprint in the object,
218 // an improved loop is called which just unpacks the
219 // bitfields from the fingerprint. Otherwise, the
220 // symbol is parsed.
221 template<typename T> inline void do_parameters_on(T* callback); // iterates over parameters only
222 BasicType return_type(); // computes the value on the fly if necessary
223
224 static BasicType fp_return_type(fingerprint_t fingerprint) {
225 assert(fp_is_valid(fingerprint), "invalid fingerprint");
226 return (BasicType) ((fingerprint >> fp_static_feature_size) & fp_result_feature_mask);
227 }
228 static fingerprint_t fp_start_parameters(fingerprint_t fingerprint) {
229 assert(fp_is_valid(fingerprint), "invalid fingerprint");
230 return fingerprint >> (fp_static_feature_size + fp_result_feature_size);
231 }
232 static BasicType fp_next_parameter(fingerprint_t& mask) {
233 int result = (mask & fp_parameter_feature_mask);
234 mask >>= fp_parameter_feature_size;
235 return (BasicType) result;
236 }
237 };
238
239
240 // Specialized SignatureIterators: Used to compute signature specific values.
241
242 class SignatureTypeNames : public SignatureIterator {
243 protected:
244 virtual void type_name(const char* name) = 0;
245
246 friend class SignatureIterator; // so do_parameters_on can call do_type
247 void do_type(BasicType type) {
248 switch (type) {
249 case T_BOOLEAN: type_name("jboolean"); break;
250 case T_CHAR: type_name("jchar" ); break;
251 case T_FLOAT: type_name("jfloat" ); break;
252 case T_DOUBLE: type_name("jdouble" ); break;
253 case T_BYTE: type_name("jbyte" ); break;
254 case T_SHORT: type_name("jshort" ); break;
255 case T_INT: type_name("jint" ); break;
256 case T_LONG: type_name("jlong" ); break;
257 case T_VOID: type_name("void" ); break;
258 case T_ARRAY:
259 case T_OBJECT: type_name("jobject" ); break;
260 default: ShouldNotReachHere();
261 }
262 }
263
264 public:
265 SignatureTypeNames(Symbol* signature) : SignatureIterator(signature) {}
266 };
267
268
269 // Specialized SignatureIterator: Used to compute the argument size.
270
271 class ArgumentSizeComputer: public SignatureIterator {
272 private:
273 int _size;
274 friend class SignatureIterator; // so do_parameters_on can call do_type
275 void do_type(BasicType type) { _size += parameter_type_word_count(type); }
276 public:
277 ArgumentSizeComputer(Symbol* signature);
278 int size() { return _size; }
279 };
280
281
282 class ArgumentCount: public SignatureIterator {
283 private:
284 int _size;
285 friend class SignatureIterator; // so do_parameters_on can call do_type
286 void do_type(BasicType type) { _size++; }
287 public:
288 ArgumentCount(Symbol* signature);
289 int size() { return _size; }
290 };
291
292
293 class ReferenceArgumentCount: public SignatureIterator {
294 private:
295 int _refs;
296 friend class SignatureIterator; // so do_parameters_on can call do_type
297 void do_type(BasicType type) { if (is_reference_type(type)) _refs++; }
298 public:
299 ReferenceArgumentCount(Symbol* signature);
300 int count() { return _refs; }
301 };
302
303
304 // Specialized SignatureIterator: Used to compute the result type.
305
306 class ResultTypeFinder: public SignatureIterator {
307 public:
308 BasicType type() { return return_type(); }
309 ResultTypeFinder(Symbol* signature) : SignatureIterator(signature) { }
310 };
311
312
313 // Fingerprinter computes a unique ID for a given method. The ID
314 // is a bitvector characterizing the methods signature (incl. the receiver).
315 class Fingerprinter: public SignatureIterator {
316 private:
317 fingerprint_t _accumulator;
318 int _param_size;
319 int _stack_arg_slots;
320 int _shift_count;
321 const Method* _method;
322
323 uint _int_args;
324 uint _fp_args;
325
326 void initialize_accumulator() {
327 _accumulator = 0;
328 _shift_count = fp_result_feature_size + fp_static_feature_size;
329 _param_size = 0;
330 _stack_arg_slots = 0;
331 }
332
333 // Out-of-line method does it all in constructor:
334 void compute_fingerprint_and_return_type(bool static_flag = false);
335
336 void initialize_calling_convention(bool static_flag);
337 void do_type_calling_convention(BasicType type);
338
339 friend class SignatureIterator; // so do_parameters_on can call do_type
340
341 void do_type(BasicType type) {
342 assert(fp_is_valid_type(type), "bad parameter type");
343 if (_param_size <= fp_max_size_of_parameters) {
344 _accumulator |= ((fingerprint_t)type << _shift_count);
345 _shift_count += fp_parameter_feature_size;
346 }
347 _param_size += (is_double_word_type(type) ? 2 : 1);
348 do_type_calling_convention(type);
349 }
350
351 public:
352 int size_of_parameters() const { return _param_size; }
353 int num_stack_arg_slots() const { return _stack_arg_slots; }
354
355 // fingerprint() and return_type() are in super class
356
357 Fingerprinter(const methodHandle& method)
358 : SignatureIterator(method->signature()),
359 _method(method()) {
360 compute_fingerprint_and_return_type();
361 }
362 Fingerprinter(Symbol* signature, bool is_static)
363 : SignatureIterator(signature),
364 _method(nullptr) {
365 compute_fingerprint_and_return_type(is_static);
366 }
367 };
368
369
370 // Specialized SignatureIterator: Used for native call purposes
371
372 class NativeSignatureIterator: public SignatureIterator {
373 private:
374 methodHandle _method;
375 // We need separate JNI and Java offset values because in 64 bit mode,
376 // the argument offsets are not in sync with the Java stack.
377 // For example a long takes up 1 "C" stack entry but 2 Java stack entries.
378 int _offset; // The java stack offset
379 int _prepended; // number of prepended JNI parameters (1 JNIEnv, plus 1 mirror if static)
380 int _jni_offset; // the current parameter offset, starting with 0
381
382 friend class SignatureIterator; // so do_parameters_on can call do_type
383 void do_type(BasicType type) {
384 switch (type) {
385 case T_BYTE:
386 case T_BOOLEAN:
387 pass_byte(); _jni_offset++; _offset++;
388 break;
389 case T_CHAR:
390 case T_SHORT:
391 pass_short(); _jni_offset++; _offset++;
392 break;
393 case T_INT:
394 pass_int(); _jni_offset++; _offset++;
395 break;
396 case T_FLOAT:
397 pass_float(); _jni_offset++; _offset++;
398 break;
399 case T_DOUBLE: {
400 int jni_offset = LP64_ONLY(1) NOT_LP64(2);
401 pass_double(); _jni_offset += jni_offset; _offset += 2;
402 break;
403 }
404 case T_LONG: {
405 int jni_offset = LP64_ONLY(1) NOT_LP64(2);
406 pass_long(); _jni_offset += jni_offset; _offset += 2;
407 break;
408 }
409 case T_ARRAY:
410 case T_OBJECT:
411 pass_object(); _jni_offset++; _offset++;
412 break;
413 default:
414 ShouldNotReachHere();
415 }
416 }
417
418 public:
419 methodHandle method() const { return _method; }
420 int offset() const { return _offset; }
421 int jni_offset() const { return _jni_offset + _prepended; }
422 bool is_static() const { return method()->is_static(); }
423 virtual void pass_int() = 0;
424 virtual void pass_long() = 0;
425 virtual void pass_object() = 0; // objects, arrays, inlines
426 virtual void pass_float() = 0;
427 virtual void pass_byte() { pass_int(); };
428 virtual void pass_short() { pass_int(); };
429 #ifdef _LP64
430 virtual void pass_double() = 0;
431 #else
432 virtual void pass_double() { pass_long(); } // may be same as long
433 #endif
434
435 NativeSignatureIterator(const methodHandle& method) : SignatureIterator(method->signature()) {
436 _method = method;
437 _offset = 0;
438 _jni_offset = 0;
439
440 const int JNIEnv_words = 1;
441 const int mirror_words = 1;
442 _prepended = !is_static() ? JNIEnv_words : JNIEnv_words + mirror_words;
443 }
444
445 void iterate() { iterate(Fingerprinter(method()).fingerprint()); }
446
447 // iterate() calls the 3 virtual methods according to the following invocation syntax:
448 //
449 // {pass_int | pass_long | pass_object}
450 //
451 // Arguments are handled from left to right (receiver first, if any).
452 // The offset() values refer to the Java stack offsets but are 0 based and increasing.
453 // The java_offset() values count down to 0, and refer to the Java TOS.
454 // The jni_offset() values increase from 1 or 2, and refer to C arguments.
455 // The method's return type is ignored.
456
457 void iterate(fingerprint_t fingerprint) {
458 set_fingerprint(fingerprint);
459 if (!is_static()) {
460 // handle receiver (not handled by iterate because not in signature)
461 pass_object(); _jni_offset++; _offset++;
462 }
463 do_parameters_on(this);
464 }
465 };
466
467
468 // This is the core parsing logic for iterating over signatures.
469 // All of the previous classes use this for doing their work.
470
471 class SignatureStream : public StackObj {
472 private:
473 const Symbol* _signature;
474 int _begin;
475 int _end;
476 int _limit;
477 int _array_prefix; // count of '[' before the array element descr
478 BasicType _type;
479 int _state;
480 Symbol* _previous_name; // cache the previously looked up symbol to avoid lookups
481 GrowableArray<Symbol*>* _names; // symbols created while parsing that need to be dereferenced
482
483 Symbol* find_symbol();
484
485 enum { _s_field = 0, _s_method = 1, _s_method_return = 3 };
486 void set_done() {
487 _state |= -2; // preserve s_method bit
488 assert(is_done(), "Unable to set state to done");
489 }
490 int scan_type(BasicType bt);
491
492 public:
493 bool at_return_type() const { return _state == (int)_s_method_return; }
494 bool is_done() const { return _state < 0; }
495 void next();
496
497 SignatureStream(const Symbol* signature, bool is_method = true);
498 ~SignatureStream();
499
500 bool is_reference() const { return is_reference_type(_type); }
501 bool is_array() const { return _type == T_ARRAY; }
502 BasicType type() const { return _type; }
503
504 const u1* raw_bytes() const { return _signature->bytes() + _begin; }
505 int raw_length() const { return _end - _begin; }
506 int raw_symbol_begin() const { return _begin + (has_envelope() ? 1 : 0); }
507 int raw_symbol_end() const { return _end - (has_envelope() ? 1 : 0); }
508 char raw_char_at(int i) const {
509 assert(i < _limit, "index for raw_char_at is over the limit");
510 return _signature->char_at(i);
511 }
512
513 // True if there is an embedded class name in this type,
514 // followed by ';'.
515 bool has_envelope() const {
516 if (!Signature::has_envelope(_signature->char_at(_begin)))
517 return false;
518 // this should always be true, but let's test it:
519 assert(_signature->char_at(_end-1) == JVM_SIGNATURE_ENDCLASS, "signature envelope has no semi-colon at end");
520 return true;
521 }
522
523 // return the symbol for chars in symbol_begin()..symbol_end()
524 Symbol* as_symbol() {
525 return find_symbol();
526 }
527
528 // in case you want only the return type:
529 void skip_to_return_type();
530
531 // number of '[' in array prefix
532 int array_prefix_length() {
533 return _type == T_ARRAY ? _array_prefix : 0;
534 }
535
536 // In case you want only the array base type,
537 // reset the stream after skipping some brackets '['.
538 // (The argument is clipped to array_prefix_length(),
539 // and if it ends up as zero this call is a nop.
540 // The default is value skips all brackets '['.)
541 private:
542 int skip_whole_array_prefix();
543 public:
544 int skip_array_prefix(int max_skip_length) {
545 if (_type != T_ARRAY) {
546 return 0;
547 }
548 if (_array_prefix > max_skip_length) {
549 // strip some but not all levels of T_ARRAY
550 _array_prefix -= max_skip_length;
551 _begin += max_skip_length;
552 return max_skip_length;
553 }
554 return skip_whole_array_prefix();
555 }
556 int skip_array_prefix() {
557 if (_type != T_ARRAY) {
558 return 0;
559 }
560 return skip_whole_array_prefix();
561 }
562
563 // free-standing lookups (bring your own CL/PD pair)
564 enum FailureMode { ReturnNull, NCDFError, CachedOrNull };
565 Klass* as_klass(Handle class_loader, FailureMode failure_mode, TRAPS);
566 oop as_java_mirror(Handle class_loader, FailureMode failure_mode, TRAPS);
567 };
568
569 // Specialized SignatureStream: used for invoking SystemDictionary to either find
570 // or resolve the underlying type when iterating over a
571 // Java descriptor (or parts of it).
572 class ResolvingSignatureStream : public SignatureStream {
573 Klass* _load_origin;
574 bool _handles_cached;
575 Handle _class_loader; // cached when needed
576
577 void initialize_load_origin(Klass* load_origin) {
578 _load_origin = load_origin;
579 _handles_cached = (load_origin == nullptr);
580 }
581 void need_handles() {
582 if (!_handles_cached) {
583 cache_handles();
584 _handles_cached = true;
585 }
586 }
587 void cache_handles();
588
589 public:
590 ResolvingSignatureStream(Symbol* signature, Klass* load_origin, bool is_method = true);
591 ResolvingSignatureStream(Symbol* signature, Handle class_loader, bool is_method = true);
592 ResolvingSignatureStream(const Method* method);
593
594 Klass* as_klass(FailureMode failure_mode, TRAPS) {
595 need_handles();
596 return SignatureStream::as_klass(_class_loader, failure_mode, THREAD);
597 }
598 oop as_java_mirror(FailureMode failure_mode, TRAPS) {
599 if (is_reference()) {
600 need_handles();
601 }
602 return SignatureStream::as_java_mirror(_class_loader, failure_mode, THREAD);
603 }
604 };
605
606 // Here is how all the SignatureIterator classes invoke the
607 // SignatureStream engine to do their parsing.
608 template<typename T> inline
609 void SignatureIterator::do_parameters_on(T* callback) {
610 fingerprint_t unaccumulator = _fingerprint;
611
612 // Check for too many arguments, or missing fingerprint:
613 if (!fp_is_valid(unaccumulator)) {
614 SignatureStream ss(_signature);
615 for (; !ss.at_return_type(); ss.next()) {
616 callback->do_type(ss.type());
617 }
618 // while we are here, capture the return type
619 _return_type = ss.type();
620 } else {
621 // Optimized version of do_parameters when fingerprint is known
622 assert(_return_type != T_ILLEGAL, "return type already captured from fp");
623 unaccumulator = fp_start_parameters(unaccumulator);
624 for (BasicType type; (type = fp_next_parameter(unaccumulator)) != (BasicType)fp_parameters_done; ) {
625 assert(fp_is_valid_type(type), "garbled fingerprint");
626 callback->do_type(type);
627 }
628 }
629 }
630
631 #ifdef ASSERT
632 class SignatureVerifier : public StackObj {
633 public:
634 static bool is_valid_method_signature(Symbol* sig);
635 static bool is_valid_type_signature(Symbol* sig);
636 private:
637 static ssize_t is_valid_type(const char*, ssize_t);
638 };
639 #endif
640 #endif // SHARE_RUNTIME_SIGNATURE_HPP