1 /*
  2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "cds/archiveBuilder.hpp"
 26 #include "classfile/altHashing.hpp"
 27 #include "classfile/classLoaderData.hpp"
 28 #include "classfile/vmSymbols.hpp"
 29 #include "gc/shared/collectedHeap.hpp"
 30 #include "logging/log.hpp"
 31 #include "logging/logStream.hpp"
 32 #include "memory/allocation.inline.hpp"
 33 #include "memory/resourceArea.hpp"
 34 #include "memory/universe.hpp"
 35 #include "oops/symbol.hpp"
 36 #include "runtime/atomicAccess.hpp"
 37 #include "runtime/mutexLocker.hpp"
 38 #include "runtime/os.hpp"
 39 #include "runtime/signature.hpp"
 40 #include "utilities/stringUtils.hpp"
 41 #include "utilities/utf8.hpp"
 42 
 43 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 44 
 45 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 46   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 47   assert(refcount >= 0, "negative refcount");
 48   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 49   uint32_t hi = hash;
 50   uint32_t lo = refcount;
 51   return (hi << 16) | lo;
 52 }
 53 
 54 Symbol::Symbol(const u1* name, int length, int refcount) {
 55   assert(length <= max_length(), "SymbolTable should have caught this!");
 56   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 57   _length = (u2)length;
 58   // _body[0..1] are allocated in the header just by coincidence in the current
 59   // implementation of Symbol. They are read by identity_hash(), so make sure they
 60   // are initialized.
 61   // No other code should assume that _body[0..1] are always allocated. E.g., do
 62   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 63   _body[0] = _body[1] = 0;
 64   memcpy(_body, name, length);
 65 }
 66 
 67 // This copies the symbol when it is added to the ConcurrentHashTable.
 68 Symbol::Symbol(const Symbol& s1) {
 69   _hash_and_refcount = s1._hash_and_refcount;
 70   _length = s1._length;
 71   memcpy(_body, s1._body, _length);
 72 }
 73 
 74 #if INCLUDE_CDS
 75 void Symbol::update_identity_hash() {
 76   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 77   _hash_and_refcount =  pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
 78 }
 79 
 80 void Symbol::set_permanent() {
 81   // This is called at a safepoint during dumping of a dynamic CDS archive.
 82   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 83   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 84 }
 85 #endif
 86 
 87 // ------------------------------------------------------------------
 88 // Symbol::index_of
 89 //
 90 // Test if we have the give substring at or after the i-th char of this
 91 // symbol's utf8 bytes.
 92 // Return -1 on failure.  Otherwise return the first index where substr occurs.
 93 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
 94   assert(i >= 0 && i <= utf8_length(), "oob");
 95   if (substr_len <= 0)  return 0;
 96   char first_char = substr[0];
 97   address bytes = (address) ((Symbol*)this)->base();
 98   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
 99   address scan = bytes + i;
100   if (scan > limit)
101     return -1;
102   for (; scan <= limit; scan++) {
103     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
104     if (scan == nullptr)
105       return -1;  // not found
106     assert(scan >= bytes+i && scan <= limit, "scan oob");
107     if (substr_len <= 2
108         ? (char) scan[substr_len-1] == substr[substr_len-1]
109         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
110       return (int)(scan - bytes);
111     }
112   }
113   return -1;
114 }
115 
116 bool Symbol::is_star_match(const char* pattern) const {
117   if (strchr(pattern, '*') == nullptr) {
118     return equals(pattern);
119   } else {
120     ResourceMark rm;
121     char* buf = as_C_string();
122     return StringUtils::is_star_match(pattern, buf);
123   }
124 }
125 
126 char* Symbol::as_C_string(char* buf, int size) const {
127   if (size > 0) {
128     int len = MIN2(size - 1, utf8_length());
129     for (int i = 0; i < len; i++) {
130       buf[i] = char_at(i);
131     }
132     buf[len] = '\0';
133   }
134   return buf;
135 }
136 
137 char* Symbol::as_C_string() const {
138   int len = utf8_length();
139   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
140   return as_C_string(str, len + 1);
141 }
142 
143 void Symbol::print_utf8_on(outputStream* st) const {
144   st->print("%s", as_C_string());
145 }
146 
147 void Symbol::print_symbol_on(outputStream* st) const {
148   char *s;
149   st = st ? st : tty;
150   {
151     // ResourceMark may not affect st->print(). If st is a string
152     // stream it could resize, using the same resource arena.
153     ResourceMark rm;
154     s = as_quoted_ascii();
155     s = os::strdup(s);
156   }
157   if (s == nullptr) {
158     st->print("(null)");
159   } else {
160     st->print("%s", s);
161     os::free(s);
162   }
163 }
164 
165 char* Symbol::as_quoted_ascii() const {
166   const char *ptr = (const char *)&_body[0];
167   size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
168   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
169   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
170   return result;
171 }
172 
173 jchar* Symbol::as_unicode(int& length) const {
174   Symbol* this_ptr = (Symbol*)this;
175   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
176   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
177   if (length > 0) {
178     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
179   }
180   return result;
181 }
182 
183 const char* Symbol::as_klass_external_name(char* buf, int size) const {
184   if (size > 0) {
185     char* str    = as_C_string(buf, size);
186     int   length = (int)strlen(str);
187     // Turn all '/'s into '.'s (also for array klasses)
188     for (int index = 0; index < length; index++) {
189       if (str[index] == JVM_SIGNATURE_SLASH) {
190         str[index] = JVM_SIGNATURE_DOT;
191       }
192     }
193     return str;
194   } else {
195     return buf;
196   }
197 }
198 
199 const char* Symbol::as_klass_external_name() const {
200   char* str    = as_C_string();
201   int   length = (int)strlen(str);
202   // Turn all '/'s into '.'s (also for array klasses)
203   for (int index = 0; index < length; index++) {
204     if (str[index] == JVM_SIGNATURE_SLASH) {
205       str[index] = JVM_SIGNATURE_DOT;
206     }
207   }
208   return str;
209 }
210 
211 static void print_class(outputStream *os, const SignatureStream& ss) {
212   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
213   for (int i = sb; i < se; ++i) {
214     char ch = ss.raw_char_at(i);
215     if (ch == JVM_SIGNATURE_SLASH) {
216       os->put(JVM_SIGNATURE_DOT);
217     } else {
218       os->put(ch);
219     }
220   }
221 }
222 
223 static void print_array(outputStream *os, SignatureStream& ss) {
224   int dimensions = ss.skip_array_prefix();
225   assert(dimensions > 0, "");
226   if (ss.is_reference()) {
227     print_class(os, ss);
228   } else {
229     os->print("%s", type2name(ss.type()));
230   }
231   for (int i = 0; i < dimensions; ++i) {
232     os->print("[]");
233   }
234 }
235 
236 void Symbol::print_as_signature_external_return_type(outputStream *os) {
237   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
238     if (ss.at_return_type()) {
239       if (ss.is_array()) {
240         print_array(os, ss);
241       } else if (ss.is_reference()) {
242         print_class(os, ss);
243       } else {
244         os->print("%s", type2name(ss.type()));
245       }
246     }
247   }
248 }
249 
250 void Symbol::print_as_signature_external_parameters(outputStream *os) {
251   bool first = true;
252   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
253     if (ss.at_return_type()) break;
254     if (!first) { os->print(", "); }
255     if (ss.is_array()) {
256       print_array(os, ss);
257     } else if (ss.is_reference()) {
258       print_class(os, ss);
259     } else {
260       os->print("%s", type2name(ss.type()));
261     }
262     first = false;
263   }
264 }
265 
266 void Symbol::print_as_field_external_type(outputStream *os) {
267   SignatureStream ss(this, false);
268   assert(!ss.is_done(), "must have at least one element in field ref");
269   assert(!ss.at_return_type(), "field ref cannot be a return type");
270   assert(!Signature::is_method(this), "field ref cannot be a method");
271 
272   if (ss.is_array()) {
273     print_array(os, ss);
274   } else if (ss.is_reference()) {
275     print_class(os, ss);
276   } else {
277     os->print("%s", type2name(ss.type()));
278   }
279 #ifdef ASSERT
280   ss.next();
281   assert(ss.is_done(), "must have at most one element in field ref");
282 #endif
283 }
284 
285 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
286 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
287 // lookup to avoid reviving a dead Symbol.
288 bool Symbol::try_increment_refcount() {
289   uint32_t found = _hash_and_refcount;
290   while (true) {
291     uint32_t old_value = found;
292     int refc = extract_refcount(old_value);
293     if (refc == PERM_REFCOUNT) {
294       return true;  // sticky max or created permanent
295     } else if (refc == 0) {
296       return false; // dead, can't revive.
297     } else {
298       found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
299       if (found == old_value) {
300         return true; // successfully updated.
301       }
302       // refcount changed, try again.
303     }
304   }
305 }
306 
307 // The increment_refcount() is called when not doing lookup. It is assumed that you
308 // have a symbol with a non-zero refcount and it can't become zero while referenced by
309 // this caller.
310 void Symbol::increment_refcount() {
311   if (!try_increment_refcount()) {
312     print();
313     fatal("refcount has gone to zero");
314   }
315 #ifndef PRODUCT
316   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
317     NOT_PRODUCT(AtomicAccess::inc(&_total_count);)
318   }
319 #endif
320 }
321 
322 // Decrement refcount potentially while racing increment, so we need
323 // to check the value after attempting to decrement so that if another
324 // thread increments to PERM_REFCOUNT the value is not decremented.
325 void Symbol::decrement_refcount() {
326   uint32_t found = _hash_and_refcount;
327   while (true) {
328     uint32_t old_value = found;
329     int refc = extract_refcount(old_value);
330     if (refc == PERM_REFCOUNT) {
331       return;  // refcount is permanent, permanent is sticky
332     } else if (refc == 0) {
333       print();
334       fatal("refcount underflow");
335       return;
336     } else {
337       found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
338       if (found == old_value) {
339         return;  // successfully updated.
340       }
341       // refcount changed, try again.
342     }
343   }
344 }
345 
346 void Symbol::make_permanent() {
347   uint32_t found = _hash_and_refcount;
348   while (true) {
349     uint32_t old_value = found;
350     int refc = extract_refcount(old_value);
351     if (refc == PERM_REFCOUNT) {
352       return;  // refcount is permanent, permanent is sticky
353     } else if (refc == 0) {
354       print();
355       fatal("refcount underflow");
356       return;
357     } else {
358       short hash = extract_hash(old_value);
359       found = AtomicAccess::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
360       if (found == old_value) {
361         return;  // successfully updated.
362       }
363       // refcount changed, try again.
364     }
365   }
366 }
367 
368 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
369   if (log_is_enabled(Trace, aot)) {
370     LogStream trace_stream(Log(aot)::trace());
371     trace_stream.print("Iter(Symbol): %p ", this);
372     print_value_on(&trace_stream);
373     trace_stream.cr();
374   }
375 }
376 
377 void Symbol::print_on(outputStream* st) const {
378   st->print("Symbol: '");
379   print_symbol_on(st);
380   st->print("'");
381   st->print(" count %d", refcount());
382 }
383 
384 void Symbol::print() const { print_on(tty); }
385 
386 // The print_value functions are present in all builds, to support the
387 // disassembler and error reporting.
388 void Symbol::print_value_on(outputStream* st) const {
389   st->print_raw("'", 1);
390   st->print_raw((const char*)base(), utf8_length());
391   st->print_raw("'", 1);
392 }
393 
394 void Symbol::print_value() const { print_value_on(tty); }
395 
396 bool Symbol::is_valid(Symbol* s) {
397   if (!is_aligned(s, sizeof(MetaWord))) return false;
398   if ((size_t)s < os::min_page_size()) return false;
399 
400   if (!os::is_readable_range(s, s + 1)) return false;
401 
402   // Symbols are not allocated in Java heap.
403   if (Universe::heap()->is_in(s)) return false;
404 
405   int len = s->utf8_length();
406   if (len < 0) return false;
407 
408   jbyte* bytes = (jbyte*) s->bytes();
409   return os::is_readable_range(bytes, bytes + len);
410 }
411 
412 // SymbolTable prints this in its statistics
413 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
414 
415 #ifndef PRODUCT
416 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
417   return vmSymbols::is_valid_id(vm_symbol_id);
418 }
419 #endif