1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "cds/archiveBuilder.hpp"
26 #include "classfile/altHashing.hpp"
27 #include "classfile/classLoaderData.hpp"
28 #include "classfile/vmSymbols.hpp"
29 #include "gc/shared/collectedHeap.hpp"
30 #include "logging/log.hpp"
31 #include "logging/logStream.hpp"
32 #include "memory/allocation.inline.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "oops/symbol.hpp"
36 #include "runtime/atomicAccess.hpp"
37 #include "runtime/mutexLocker.hpp"
38 #include "runtime/os.hpp"
39 #include "runtime/signature.hpp"
40 #include "utilities/stringUtils.hpp"
41 #include "utilities/utf8.hpp"
42
43 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
44
45 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
46 STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
47 assert(refcount >= 0, "negative refcount");
48 assert(refcount <= PERM_REFCOUNT, "invalid refcount");
49 uint32_t hi = hash;
50 uint32_t lo = refcount;
51 return (hi << 16) | lo;
52 }
53
54 Symbol::Symbol(const u1* name, int length, int refcount) {
55 assert(length <= max_length(), "SymbolTable should have caught this!");
56 _hash_and_refcount = pack_hash_and_refcount((short)os::random(), refcount);
57 _length = (u2)length;
58 // _body[0..1] are allocated in the header just by coincidence in the current
59 // implementation of Symbol. They are read by identity_hash(), so make sure they
60 // are initialized.
61 // No other code should assume that _body[0..1] are always allocated. E.g., do
62 // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
63 _body[0] = _body[1] = 0;
64 memcpy(_body, name, length);
65 }
66
67 // This copies the symbol when it is added to the ConcurrentHashTable.
68 Symbol::Symbol(const Symbol& s1) {
69 _hash_and_refcount = s1._hash_and_refcount;
70 _length = s1._length;
71 memcpy(_body, s1._body, _length);
72 }
73
74 #if INCLUDE_CDS
75 void Symbol::update_identity_hash() {
76 assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
77 _hash_and_refcount = pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
78 }
79
80 void Symbol::set_permanent() {
81 // This is called at a safepoint during dumping of a dynamic CDS archive.
82 assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
83 _hash_and_refcount = pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
84 }
85 #endif
86
87 Symbol* Symbol::fundamental_name(TRAPS) {
88 if (char_at(0) == JVM_SIGNATURE_CLASS && ends_with(JVM_SIGNATURE_ENDCLASS)) {
89 return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
90 } else {
91 // reference count is incremented to be consistent with the behavior with
92 // the SymbolTable::new_symbol() call above
93 this->increment_refcount();
94 return this;
95 }
96 }
97
98 bool Symbol::is_same_fundamental_type(Symbol* s) const {
99 if (this == s) return true;
100 if (utf8_length() < 3) return false;
101 int offset1, offset2, len;
102 if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
103 if (char_at(0) != JVM_SIGNATURE_CLASS) return false;
104 offset1 = 1;
105 len = utf8_length() - 2;
106 } else {
107 offset1 = 0;
108 len = utf8_length();
109 }
110 if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
111 if (s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
112 offset2 = 1;
113 } else {
114 offset2 = 0;
115 }
116 if ((offset2 + len) > s->utf8_length()) return false;
117 if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
118 return false;
119 int l = len;
120 while (l-- > 0) {
121 if (char_at(offset1 + l) != s->char_at(offset2 + l))
122 return false;
123 }
124 return true;
125 }
126
127 // ------------------------------------------------------------------
128 // Symbol::index_of
129 //
130 // Test if we have the give substring at or after the i-th char of this
131 // symbol's utf8 bytes.
132 // Return -1 on failure. Otherwise return the first index where substr occurs.
133 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
134 assert(i >= 0 && i <= utf8_length(), "oob");
135 if (substr_len <= 0) return 0;
136 char first_char = substr[0];
137 address bytes = (address) ((Symbol*)this)->base();
138 address limit = bytes + utf8_length() - substr_len; // inclusive limit
139 address scan = bytes + i;
140 if (scan > limit)
141 return -1;
142 for (; scan <= limit; scan++) {
143 scan = (address) memchr(scan, first_char, (limit + 1 - scan));
144 if (scan == nullptr)
145 return -1; // not found
146 assert(scan >= bytes+i && scan <= limit, "scan oob");
147 if (substr_len <= 2
148 ? (char) scan[substr_len-1] == substr[substr_len-1]
149 : memcmp(scan+1, substr+1, substr_len-1) == 0) {
150 return (int)(scan - bytes);
151 }
152 }
153 return -1;
154 }
155
156 bool Symbol::is_star_match(const char* pattern) const {
157 if (strchr(pattern, '*') == nullptr) {
158 return equals(pattern);
159 } else {
160 ResourceMark rm;
161 char* buf = as_C_string();
162 return StringUtils::is_star_match(pattern, buf);
163 }
164 }
165
166 char* Symbol::as_C_string(char* buf, int size) const {
167 if (size > 0) {
168 int len = MIN2(size - 1, utf8_length());
169 for (int i = 0; i < len; i++) {
170 buf[i] = char_at(i);
171 }
172 buf[len] = '\0';
173 }
174 return buf;
175 }
176
177 char* Symbol::as_C_string() const {
178 int len = utf8_length();
179 char* str = NEW_RESOURCE_ARRAY(char, len + 1);
180 return as_C_string(str, len + 1);
181 }
182
183 void Symbol::print_utf8_on(outputStream* st) const {
184 st->print("%s", as_C_string());
185 }
186
187 void Symbol::print_symbol_on(outputStream* st) const {
188 char *s;
189 st = st ? st : tty;
190 {
191 // ResourceMark may not affect st->print(). If st is a string
192 // stream it could resize, using the same resource arena.
193 ResourceMark rm;
194 s = as_quoted_ascii();
195 s = os::strdup(s);
196 }
197 if (s == nullptr) {
198 st->print("(null)");
199 } else {
200 st->print("%s", s);
201 os::free(s);
202 }
203 }
204
205 char* Symbol::as_quoted_ascii() const {
206 const char *ptr = (const char *)&_body[0];
207 size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
208 char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
209 UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
210 return result;
211 }
212
213 jchar* Symbol::as_unicode(int& length) const {
214 Symbol* this_ptr = (Symbol*)this;
215 length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
216 jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
217 if (length > 0) {
218 UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
219 }
220 return result;
221 }
222
223 const char* Symbol::as_klass_external_name(char* buf, int size) const {
224 if (size > 0) {
225 char* str = as_C_string(buf, size);
226 int length = (int)strlen(str);
227 // Turn all '/'s into '.'s (also for array klasses)
228 for (int index = 0; index < length; index++) {
229 if (str[index] == JVM_SIGNATURE_SLASH) {
230 str[index] = JVM_SIGNATURE_DOT;
231 }
232 }
233 return str;
234 } else {
235 return buf;
236 }
237 }
238
239 const char* Symbol::as_klass_external_name() const {
240 char* str = as_C_string();
241 int length = (int)strlen(str);
242 // Turn all '/'s into '.'s (also for array klasses)
243 for (int index = 0; index < length; index++) {
244 if (str[index] == JVM_SIGNATURE_SLASH) {
245 str[index] = JVM_SIGNATURE_DOT;
246 }
247 }
248 return str;
249 }
250
251 static void print_class(outputStream *os, const SignatureStream& ss) {
252 int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
253 for (int i = sb; i < se; ++i) {
254 char ch = ss.raw_char_at(i);
255 if (ch == JVM_SIGNATURE_SLASH) {
256 os->put(JVM_SIGNATURE_DOT);
257 } else {
258 os->put(ch);
259 }
260 }
261 }
262
263 static void print_array(outputStream *os, SignatureStream& ss) {
264 int dimensions = ss.skip_array_prefix();
265 assert(dimensions > 0, "");
266 if (ss.is_reference()) {
267 print_class(os, ss);
268 } else {
269 os->print("%s", type2name(ss.type()));
270 }
271 for (int i = 0; i < dimensions; ++i) {
272 os->print("[]");
273 }
274 }
275
276 void Symbol::print_as_signature_external_return_type(outputStream *os) {
277 for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
278 if (ss.at_return_type()) {
279 if (ss.is_array()) {
280 print_array(os, ss);
281 } else if (ss.is_reference()) {
282 print_class(os, ss);
283 } else {
284 os->print("%s", type2name(ss.type()));
285 }
286 }
287 }
288 }
289
290 void Symbol::print_as_signature_external_parameters(outputStream *os) {
291 bool first = true;
292 for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
293 if (ss.at_return_type()) break;
294 if (!first) { os->print(", "); }
295 if (ss.is_array()) {
296 print_array(os, ss);
297 } else if (ss.is_reference()) {
298 print_class(os, ss);
299 } else {
300 os->print("%s", type2name(ss.type()));
301 }
302 first = false;
303 }
304 }
305
306 void Symbol::print_as_field_external_type(outputStream *os) {
307 SignatureStream ss(this, false);
308 assert(!ss.is_done(), "must have at least one element in field ref");
309 assert(!ss.at_return_type(), "field ref cannot be a return type");
310 assert(!Signature::is_method(this), "field ref cannot be a method");
311
312 if (ss.is_array()) {
313 print_array(os, ss);
314 } else if (ss.is_reference()) {
315 print_class(os, ss);
316 } else {
317 os->print("%s", type2name(ss.type()));
318 }
319 #ifdef ASSERT
320 ss.next();
321 assert(ss.is_done(), "must have at most one element in field ref");
322 #endif
323 }
324
325 // Increment refcount while checking for zero. If the Symbol's refcount becomes zero
326 // a thread could be concurrently removing the Symbol. This is used during SymbolTable
327 // lookup to avoid reviving a dead Symbol.
328 bool Symbol::try_increment_refcount() {
329 uint32_t found = _hash_and_refcount;
330 while (true) {
331 uint32_t old_value = found;
332 int refc = extract_refcount(old_value);
333 if (refc == PERM_REFCOUNT) {
334 return true; // sticky max or created permanent
335 } else if (refc == 0) {
336 return false; // dead, can't revive.
337 } else {
338 found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
339 if (found == old_value) {
340 return true; // successfully updated.
341 }
342 // refcount changed, try again.
343 }
344 }
345 }
346
347 // The increment_refcount() is called when not doing lookup. It is assumed that you
348 // have a symbol with a non-zero refcount and it can't become zero while referenced by
349 // this caller.
350 void Symbol::increment_refcount() {
351 if (!try_increment_refcount()) {
352 print();
353 fatal("refcount has gone to zero");
354 }
355 #ifndef PRODUCT
356 if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
357 NOT_PRODUCT(AtomicAccess::inc(&_total_count);)
358 }
359 #endif
360 }
361
362 // Decrement refcount potentially while racing increment, so we need
363 // to check the value after attempting to decrement so that if another
364 // thread increments to PERM_REFCOUNT the value is not decremented.
365 void Symbol::decrement_refcount() {
366 uint32_t found = _hash_and_refcount;
367 while (true) {
368 uint32_t old_value = found;
369 int refc = extract_refcount(old_value);
370 if (refc == PERM_REFCOUNT) {
371 return; // refcount is permanent, permanent is sticky
372 } else if (refc == 0) {
373 print();
374 fatal("refcount underflow");
375 return;
376 } else {
377 found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
378 if (found == old_value) {
379 return; // successfully updated.
380 }
381 // refcount changed, try again.
382 }
383 }
384 }
385
386 void Symbol::make_permanent() {
387 uint32_t found = _hash_and_refcount;
388 while (true) {
389 uint32_t old_value = found;
390 int refc = extract_refcount(old_value);
391 if (refc == PERM_REFCOUNT) {
392 return; // refcount is permanent, permanent is sticky
393 } else if (refc == 0) {
394 print();
395 fatal("refcount underflow");
396 return;
397 } else {
398 short hash = extract_hash(old_value);
399 found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
400 if (found == old_value) {
401 return; // successfully updated.
402 }
403 // refcount changed, try again.
404 }
405 }
406 }
407
408 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
409 if (log_is_enabled(Trace, aot)) {
410 LogStream trace_stream(Log(aot)::trace());
411 trace_stream.print("Iter(Symbol): %p ", this);
412 print_value_on(&trace_stream);
413 trace_stream.cr();
414 }
415 }
416
417 void Symbol::print_on(outputStream* st) const {
418 st->print("Symbol: '");
419 print_symbol_on(st);
420 st->print("'");
421 st->print(" count %d", refcount());
422 }
423
424 void Symbol::print() const { print_on(tty); }
425
426 // The print_value functions are present in all builds, to support the
427 // disassembler and error reporting.
428 void Symbol::print_value_on(outputStream* st) const {
429 st->print_raw("'", 1);
430 st->print_raw((const char*)base(), utf8_length());
431 st->print_raw("'", 1);
432 }
433
434 void Symbol::print_value() const { print_value_on(tty); }
435
436 bool Symbol::is_valid(Symbol* s) {
437 if (!is_aligned(s, sizeof(MetaWord))) return false;
438 if ((size_t)s < os::min_page_size()) return false;
439
440 if (!os::is_readable_range(s, s + 1)) return false;
441
442 // Symbols are not allocated in Java heap.
443 if (Universe::heap()->is_in(s)) return false;
444
445 int len = s->utf8_length();
446 if (len < 0) return false;
447
448 jbyte* bytes = (jbyte*) s->bytes();
449 return os::is_readable_range(bytes, bytes + len);
450 }
451
452 // SymbolTable prints this in its statistics
453 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
454
455 #ifndef PRODUCT
456 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
457 return vmSymbols::is_valid_id(vm_symbol_id);
458 }
459 #endif