1 /*
  2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "cds/archiveBuilder.hpp"
 26 #include "cds/metaspaceShared.hpp"
 27 #include "classfile/altHashing.hpp"
 28 #include "classfile/classLoaderData.hpp"
 29 #include "classfile/vmSymbols.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "logging/log.hpp"
 32 #include "logging/logStream.hpp"
 33 #include "memory/allocation.inline.hpp"
 34 #include "memory/resourceArea.hpp"
 35 #include "memory/universe.hpp"
 36 #include "oops/symbol.hpp"
 37 #include "runtime/atomic.hpp"
 38 #include "runtime/mutexLocker.hpp"
 39 #include "runtime/os.hpp"
 40 #include "runtime/signature.hpp"
 41 #include "utilities/stringUtils.hpp"
 42 #include "utilities/utf8.hpp"
 43 
 44 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 45 
 46 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 47   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 48   assert(refcount >= 0, "negative refcount");
 49   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 50   uint32_t hi = hash;
 51   uint32_t lo = refcount;
 52   return (hi << 16) | lo;
 53 }
 54 
 55 Symbol::Symbol(const u1* name, int length, int refcount) {
 56   assert(length <= max_length(), "SymbolTable should have caught this!");
 57   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 58   _length = (u2)length;
 59   // _body[0..1] are allocated in the header just by coincidence in the current
 60   // implementation of Symbol. They are read by identity_hash(), so make sure they
 61   // are initialized.
 62   // No other code should assume that _body[0..1] are always allocated. E.g., do
 63   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 64   _body[0] = _body[1] = 0;
 65   memcpy(_body, name, length);
 66 }
 67 
 68 // This copies the symbol when it is added to the ConcurrentHashTable.
 69 Symbol::Symbol(const Symbol& s1) {
 70   _hash_and_refcount = s1._hash_and_refcount;
 71   _length = s1._length;
 72   memcpy(_body, s1._body, _length);
 73 }
 74 
 75 #if INCLUDE_CDS
 76 void Symbol::update_identity_hash() {
 77   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 78   _hash_and_refcount =  pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
 79 }
 80 
 81 void Symbol::set_permanent() {
 82   // This is called at a safepoint during dumping of a dynamic CDS archive.
 83   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 84   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 85 }
 86 #endif
 87 
 88 Symbol* Symbol::fundamental_name(TRAPS) {
 89   if (char_at(0) == JVM_SIGNATURE_CLASS && ends_with(JVM_SIGNATURE_ENDCLASS)) {
 90     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
 91   } else {
 92     // reference count is incremented to be consistent with the behavior with
 93     // the SymbolTable::new_symbol() call above
 94     this->increment_refcount();
 95     return this;
 96   }
 97 }
 98 
 99 bool Symbol::is_same_fundamental_type(Symbol* s) const {
100   if (this == s) return true;
101   if (utf8_length() < 3) return false;
102   int offset1, offset2, len;
103   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
104     if (char_at(0) != JVM_SIGNATURE_CLASS) return false;
105     offset1 = 1;
106     len = utf8_length() - 2;
107   } else {
108     offset1 = 0;
109     len = utf8_length();
110   }
111   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
112     if (s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
113     offset2 = 1;
114   } else {
115     offset2 = 0;
116   }
117   if ((offset2 + len) > s->utf8_length()) return false;
118   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
119     return false;
120   int l = len;
121   while (l-- > 0) {
122     if (char_at(offset1 + l) != s->char_at(offset2 + l))
123       return false;
124   }
125   return true;
126 }
127 
128 // ------------------------------------------------------------------
129 // Symbol::index_of
130 //
131 // Test if we have the give substring at or after the i-th char of this
132 // symbol's utf8 bytes.
133 // Return -1 on failure.  Otherwise return the first index where substr occurs.
134 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
135   assert(i >= 0 && i <= utf8_length(), "oob");
136   if (substr_len <= 0)  return 0;
137   char first_char = substr[0];
138   address bytes = (address) ((Symbol*)this)->base();
139   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
140   address scan = bytes + i;
141   if (scan > limit)
142     return -1;
143   for (; scan <= limit; scan++) {
144     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
145     if (scan == nullptr)
146       return -1;  // not found
147     assert(scan >= bytes+i && scan <= limit, "scan oob");
148     if (substr_len <= 2
149         ? (char) scan[substr_len-1] == substr[substr_len-1]
150         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
151       return (int)(scan - bytes);
152     }
153   }
154   return -1;
155 }
156 
157 bool Symbol::is_star_match(const char* pattern) const {
158   if (strchr(pattern, '*') == nullptr) {
159     return equals(pattern);
160   } else {
161     ResourceMark rm;
162     char* buf = as_C_string();
163     return StringUtils::is_star_match(pattern, buf);
164   }
165 }
166 
167 char* Symbol::as_C_string(char* buf, int size) const {
168   if (size > 0) {
169     int len = MIN2(size - 1, utf8_length());
170     for (int i = 0; i < len; i++) {
171       buf[i] = char_at(i);
172     }
173     buf[len] = '\0';
174   }
175   return buf;
176 }
177 
178 char* Symbol::as_C_string() const {
179   int len = utf8_length();
180   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
181   return as_C_string(str, len + 1);
182 }
183 
184 void Symbol::print_utf8_on(outputStream* st) const {
185   st->print("%s", as_C_string());
186 }
187 
188 void Symbol::print_symbol_on(outputStream* st) const {
189   char *s;
190   st = st ? st : tty;
191   {
192     // ResourceMark may not affect st->print(). If st is a string
193     // stream it could resize, using the same resource arena.
194     ResourceMark rm;
195     s = as_quoted_ascii();
196     s = os::strdup(s);
197   }
198   if (s == nullptr) {
199     st->print("(null)");
200   } else {
201     st->print("%s", s);
202     os::free(s);
203   }
204 }
205 
206 char* Symbol::as_quoted_ascii() const {
207   const char *ptr = (const char *)&_body[0];
208   size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
209   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
210   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
211   return result;
212 }
213 
214 jchar* Symbol::as_unicode(int& length) const {
215   Symbol* this_ptr = (Symbol*)this;
216   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
217   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
218   if (length > 0) {
219     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
220   }
221   return result;
222 }
223 
224 const char* Symbol::as_klass_external_name(char* buf, int size) const {
225   if (size > 0) {
226     char* str    = as_C_string(buf, size);
227     int   length = (int)strlen(str);
228     // Turn all '/'s into '.'s (also for array klasses)
229     for (int index = 0; index < length; index++) {
230       if (str[index] == JVM_SIGNATURE_SLASH) {
231         str[index] = JVM_SIGNATURE_DOT;
232       }
233     }
234     return str;
235   } else {
236     return buf;
237   }
238 }
239 
240 const char* Symbol::as_klass_external_name() const {
241   char* str    = as_C_string();
242   int   length = (int)strlen(str);
243   // Turn all '/'s into '.'s (also for array klasses)
244   for (int index = 0; index < length; index++) {
245     if (str[index] == JVM_SIGNATURE_SLASH) {
246       str[index] = JVM_SIGNATURE_DOT;
247     }
248   }
249   return str;
250 }
251 
252 static void print_class(outputStream *os, const SignatureStream& ss) {
253   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
254   for (int i = sb; i < se; ++i) {
255     char ch = ss.raw_char_at(i);
256     if (ch == JVM_SIGNATURE_SLASH) {
257       os->put(JVM_SIGNATURE_DOT);
258     } else {
259       os->put(ch);
260     }
261   }
262 }
263 
264 static void print_array(outputStream *os, SignatureStream& ss) {
265   int dimensions = ss.skip_array_prefix();
266   assert(dimensions > 0, "");
267   if (ss.is_reference()) {
268     print_class(os, ss);
269   } else {
270     os->print("%s", type2name(ss.type()));
271   }
272   for (int i = 0; i < dimensions; ++i) {
273     os->print("[]");
274   }
275 }
276 
277 void Symbol::print_as_signature_external_return_type(outputStream *os) {
278   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
279     if (ss.at_return_type()) {
280       if (ss.is_array()) {
281         print_array(os, ss);
282       } else if (ss.is_reference()) {
283         print_class(os, ss);
284       } else {
285         os->print("%s", type2name(ss.type()));
286       }
287     }
288   }
289 }
290 
291 void Symbol::print_as_signature_external_parameters(outputStream *os) {
292   bool first = true;
293   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
294     if (ss.at_return_type()) break;
295     if (!first) { os->print(", "); }
296     if (ss.is_array()) {
297       print_array(os, ss);
298     } else if (ss.is_reference()) {
299       print_class(os, ss);
300     } else {
301       os->print("%s", type2name(ss.type()));
302     }
303     first = false;
304   }
305 }
306 
307 void Symbol::print_as_field_external_type(outputStream *os) {
308   SignatureStream ss(this, false);
309   assert(!ss.is_done(), "must have at least one element in field ref");
310   assert(!ss.at_return_type(), "field ref cannot be a return type");
311   assert(!Signature::is_method(this), "field ref cannot be a method");
312 
313   if (ss.is_array()) {
314     print_array(os, ss);
315   } else if (ss.is_reference()) {
316     print_class(os, ss);
317   } else {
318     os->print("%s", type2name(ss.type()));
319   }
320 #ifdef ASSERT
321   ss.next();
322   assert(ss.is_done(), "must have at most one element in field ref");
323 #endif
324 }
325 
326 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
327 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
328 // lookup to avoid reviving a dead Symbol.
329 bool Symbol::try_increment_refcount() {
330   uint32_t found = _hash_and_refcount;
331   while (true) {
332     uint32_t old_value = found;
333     int refc = extract_refcount(old_value);
334     if (refc == PERM_REFCOUNT) {
335       return true;  // sticky max or created permanent
336     } else if (refc == 0) {
337       return false; // dead, can't revive.
338     } else {
339       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
340       if (found == old_value) {
341         return true; // successfully updated.
342       }
343       // refcount changed, try again.
344     }
345   }
346 }
347 
348 // The increment_refcount() is called when not doing lookup. It is assumed that you
349 // have a symbol with a non-zero refcount and it can't become zero while referenced by
350 // this caller.
351 void Symbol::increment_refcount() {
352   if (!try_increment_refcount()) {
353     print();
354     fatal("refcount has gone to zero");
355   }
356 #ifndef PRODUCT
357   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
358     NOT_PRODUCT(Atomic::inc(&_total_count);)
359   }
360 #endif
361 }
362 
363 // Decrement refcount potentially while racing increment, so we need
364 // to check the value after attempting to decrement so that if another
365 // thread increments to PERM_REFCOUNT the value is not decremented.
366 void Symbol::decrement_refcount() {
367   uint32_t found = _hash_and_refcount;
368   while (true) {
369     uint32_t old_value = found;
370     int refc = extract_refcount(old_value);
371     if (refc == PERM_REFCOUNT) {
372       return;  // refcount is permanent, permanent is sticky
373     } else if (refc == 0) {
374       print();
375       fatal("refcount underflow");
376       return;
377     } else {
378       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
379       if (found == old_value) {
380         return;  // successfully updated.
381       }
382       // refcount changed, try again.
383     }
384   }
385 }
386 
387 void Symbol::make_permanent() {
388   uint32_t found = _hash_and_refcount;
389   while (true) {
390     uint32_t old_value = found;
391     int refc = extract_refcount(old_value);
392     if (refc == PERM_REFCOUNT) {
393       return;  // refcount is permanent, permanent is sticky
394     } else if (refc == 0) {
395       print();
396       fatal("refcount underflow");
397       return;
398     } else {
399       short hash = extract_hash(old_value);
400       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
401       if (found == old_value) {
402         return;  // successfully updated.
403       }
404       // refcount changed, try again.
405     }
406   }
407 }
408 
409 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
410   if (log_is_enabled(Trace, cds)) {
411     LogStream trace_stream(Log(cds)::trace());
412     trace_stream.print("Iter(Symbol): %p ", this);
413     print_value_on(&trace_stream);
414     trace_stream.cr();
415   }
416 }
417 
418 void Symbol::print_on(outputStream* st) const {
419   st->print("Symbol: '");
420   print_symbol_on(st);
421   st->print("'");
422   st->print(" count %d", refcount());
423 }
424 
425 void Symbol::print() const { print_on(tty); }
426 
427 // The print_value functions are present in all builds, to support the
428 // disassembler and error reporting.
429 void Symbol::print_value_on(outputStream* st) const {
430   st->print_raw("'", 1);
431   st->print_raw((const char*)base(), utf8_length());
432   st->print_raw("'", 1);
433 }
434 
435 void Symbol::print_value() const { print_value_on(tty); }
436 
437 bool Symbol::is_valid(Symbol* s) {
438   if (!is_aligned(s, sizeof(MetaWord))) return false;
439   if ((size_t)s < os::min_page_size()) return false;
440 
441   if (!os::is_readable_range(s, s + 1)) return false;
442 
443   // Symbols are not allocated in Java heap.
444   if (Universe::heap()->is_in(s)) return false;
445 
446   int len = s->utf8_length();
447   if (len < 0) return false;
448 
449   jbyte* bytes = (jbyte*) s->bytes();
450   return os::is_readable_range(bytes, bytes + len);
451 }
452 
453 // SymbolTable prints this in its statistics
454 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
455 
456 #ifndef PRODUCT
457 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
458   return vmSymbols::is_valid_id(vm_symbol_id);
459 }
460 #endif