1 /*
  2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "cds/archiveBuilder.hpp"
 27 #include "cds/metaspaceShared.hpp"
 28 #include "classfile/altHashing.hpp"
 29 #include "classfile/classLoaderData.hpp"
 30 #include "classfile/vmSymbols.hpp"
 31 #include "gc/shared/collectedHeap.hpp"
 32 #include "logging/log.hpp"
 33 #include "logging/logStream.hpp"
 34 #include "memory/allocation.inline.hpp"
 35 #include "memory/resourceArea.hpp"
 36 #include "memory/universe.hpp"
 37 #include "oops/symbol.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/mutexLocker.hpp"
 40 #include "runtime/os.hpp"
 41 #include "runtime/signature.hpp"
 42 #include "utilities/stringUtils.hpp"
 43 #include "utilities/utf8.hpp"
 44 
 45 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 46 
 47 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 48   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 49   assert(refcount >= 0, "negative refcount");
 50   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 51   uint32_t hi = hash;
 52   uint32_t lo = refcount;
 53   return (hi << 16) | lo;
 54 }
 55 
 56 Symbol::Symbol(const u1* name, int length, int refcount) {
 57   assert(length <= max_length(), "SymbolTable should have caught this!");
 58   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 59   _length = (u2)length;
 60   // _body[0..1] are allocated in the header just by coincidence in the current
 61   // implementation of Symbol. They are read by identity_hash(), so make sure they
 62   // are initialized.
 63   // No other code should assume that _body[0..1] are always allocated. E.g., do
 64   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 65   _body[0] = _body[1] = 0;
 66   memcpy(_body, name, length);
 67 }
 68 
 69 // This copies the symbol when it is added to the ConcurrentHashTable.
 70 Symbol::Symbol(const Symbol& s1) {
 71   _hash_and_refcount = s1._hash_and_refcount;
 72   _length = s1._length;
 73   memcpy(_body, s1._body, _length);
 74 }
 75 
 76 #if INCLUDE_CDS
 77 void Symbol::update_identity_hash() {
 78   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 79   _hash_and_refcount =  pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
 80 }
 81 
 82 void Symbol::set_permanent() {
 83   // This is called at a safepoint during dumping of a dynamic CDS archive.
 84   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 85   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 86 }
 87 #endif
 88 
 89 Symbol* Symbol::fundamental_name(TRAPS) {
 90   if (char_at(0) == JVM_SIGNATURE_CLASS && ends_with(JVM_SIGNATURE_ENDCLASS)) {
 91     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
 92   } else {
 93     // reference count is incremented to be consistent with the behavior with
 94     // the SymbolTable::new_symbol() call above
 95     this->increment_refcount();
 96     return this;
 97   }
 98 }
 99 
100 bool Symbol::is_same_fundamental_type(Symbol* s) const {
101   if (this == s) return true;
102   if (utf8_length() < 3) return false;
103   int offset1, offset2, len;
104   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
105     if (char_at(0) != JVM_SIGNATURE_CLASS) return false;
106     offset1 = 1;
107     len = utf8_length() - 2;
108   } else {
109     offset1 = 0;
110     len = utf8_length();
111   }
112   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
113     if (s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
114     offset2 = 1;
115   } else {
116     offset2 = 0;
117   }
118   if ((offset2 + len) > s->utf8_length()) return false;
119   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
120     return false;
121   int l = len;
122   while (l-- > 0) {
123     if (char_at(offset1 + l) != s->char_at(offset2 + l))
124       return false;
125   }
126   return true;
127 }
128 
129 // ------------------------------------------------------------------
130 // Symbol::index_of
131 //
132 // Test if we have the give substring at or after the i-th char of this
133 // symbol's utf8 bytes.
134 // Return -1 on failure.  Otherwise return the first index where substr occurs.
135 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
136   assert(i >= 0 && i <= utf8_length(), "oob");
137   if (substr_len <= 0)  return 0;
138   char first_char = substr[0];
139   address bytes = (address) ((Symbol*)this)->base();
140   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
141   address scan = bytes + i;
142   if (scan > limit)
143     return -1;
144   for (; scan <= limit; scan++) {
145     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
146     if (scan == nullptr)
147       return -1;  // not found
148     assert(scan >= bytes+i && scan <= limit, "scan oob");
149     if (substr_len <= 2
150         ? (char) scan[substr_len-1] == substr[substr_len-1]
151         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
152       return (int)(scan - bytes);
153     }
154   }
155   return -1;
156 }
157 
158 bool Symbol::is_star_match(const char* pattern) const {
159   if (strchr(pattern, '*') == nullptr) {
160     return equals(pattern);
161   } else {
162     ResourceMark rm;
163     char* buf = as_C_string();
164     return StringUtils::is_star_match(pattern, buf);
165   }
166 }
167 
168 char* Symbol::as_C_string(char* buf, int size) const {
169   if (size > 0) {
170     int len = MIN2(size - 1, utf8_length());
171     for (int i = 0; i < len; i++) {
172       buf[i] = char_at(i);
173     }
174     buf[len] = '\0';
175   }
176   return buf;
177 }
178 
179 char* Symbol::as_C_string() const {
180   int len = utf8_length();
181   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
182   return as_C_string(str, len + 1);
183 }
184 
185 void Symbol::print_utf8_on(outputStream* st) const {
186   st->print("%s", as_C_string());
187 }
188 
189 void Symbol::print_symbol_on(outputStream* st) const {
190   char *s;
191   st = st ? st : tty;
192   {
193     // ResourceMark may not affect st->print(). If st is a string
194     // stream it could resize, using the same resource arena.
195     ResourceMark rm;
196     s = as_quoted_ascii();
197     s = os::strdup(s);
198   }
199   if (s == nullptr) {
200     st->print("(null)");
201   } else {
202     st->print("%s", s);
203     os::free(s);
204   }
205 }
206 
207 char* Symbol::as_quoted_ascii() const {
208   const char *ptr = (const char *)&_body[0];
209   size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
210   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
211   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
212   return result;
213 }
214 
215 jchar* Symbol::as_unicode(int& length) const {
216   Symbol* this_ptr = (Symbol*)this;
217   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
218   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
219   if (length > 0) {
220     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
221   }
222   return result;
223 }
224 
225 const char* Symbol::as_klass_external_name(char* buf, int size) const {
226   if (size > 0) {
227     char* str    = as_C_string(buf, size);
228     int   length = (int)strlen(str);
229     // Turn all '/'s into '.'s (also for array klasses)
230     for (int index = 0; index < length; index++) {
231       if (str[index] == JVM_SIGNATURE_SLASH) {
232         str[index] = JVM_SIGNATURE_DOT;
233       }
234     }
235     return str;
236   } else {
237     return buf;
238   }
239 }
240 
241 const char* Symbol::as_klass_external_name() const {
242   char* str    = as_C_string();
243   int   length = (int)strlen(str);
244   // Turn all '/'s into '.'s (also for array klasses)
245   for (int index = 0; index < length; index++) {
246     if (str[index] == JVM_SIGNATURE_SLASH) {
247       str[index] = JVM_SIGNATURE_DOT;
248     }
249   }
250   return str;
251 }
252 
253 static void print_class(outputStream *os, const SignatureStream& ss) {
254   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
255   for (int i = sb; i < se; ++i) {
256     char ch = ss.raw_char_at(i);
257     if (ch == JVM_SIGNATURE_SLASH) {
258       os->put(JVM_SIGNATURE_DOT);
259     } else {
260       os->put(ch);
261     }
262   }
263 }
264 
265 static void print_array(outputStream *os, SignatureStream& ss) {
266   int dimensions = ss.skip_array_prefix();
267   assert(dimensions > 0, "");
268   if (ss.is_reference()) {
269     print_class(os, ss);
270   } else {
271     os->print("%s", type2name(ss.type()));
272   }
273   for (int i = 0; i < dimensions; ++i) {
274     os->print("[]");
275   }
276 }
277 
278 void Symbol::print_as_signature_external_return_type(outputStream *os) {
279   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
280     if (ss.at_return_type()) {
281       if (ss.is_array()) {
282         print_array(os, ss);
283       } else if (ss.is_reference()) {
284         print_class(os, ss);
285       } else {
286         os->print("%s", type2name(ss.type()));
287       }
288     }
289   }
290 }
291 
292 void Symbol::print_as_signature_external_parameters(outputStream *os) {
293   bool first = true;
294   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
295     if (ss.at_return_type()) break;
296     if (!first) { os->print(", "); }
297     if (ss.is_array()) {
298       print_array(os, ss);
299     } else if (ss.is_reference()) {
300       print_class(os, ss);
301     } else {
302       os->print("%s", type2name(ss.type()));
303     }
304     first = false;
305   }
306 }
307 
308 void Symbol::print_as_field_external_type(outputStream *os) {
309   SignatureStream ss(this, false);
310   assert(!ss.is_done(), "must have at least one element in field ref");
311   assert(!ss.at_return_type(), "field ref cannot be a return type");
312   assert(!Signature::is_method(this), "field ref cannot be a method");
313 
314   if (ss.is_array()) {
315     print_array(os, ss);
316   } else if (ss.is_reference()) {
317     print_class(os, ss);
318   } else {
319     os->print("%s", type2name(ss.type()));
320   }
321 #ifdef ASSERT
322   ss.next();
323   assert(ss.is_done(), "must have at most one element in field ref");
324 #endif
325 }
326 
327 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
328 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
329 // lookup to avoid reviving a dead Symbol.
330 bool Symbol::try_increment_refcount() {
331   uint32_t found = _hash_and_refcount;
332   while (true) {
333     uint32_t old_value = found;
334     int refc = extract_refcount(old_value);
335     if (refc == PERM_REFCOUNT) {
336       return true;  // sticky max or created permanent
337     } else if (refc == 0) {
338       return false; // dead, can't revive.
339     } else {
340       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
341       if (found == old_value) {
342         return true; // successfully updated.
343       }
344       // refcount changed, try again.
345     }
346   }
347 }
348 
349 // The increment_refcount() is called when not doing lookup. It is assumed that you
350 // have a symbol with a non-zero refcount and it can't become zero while referenced by
351 // this caller.
352 void Symbol::increment_refcount() {
353   if (!try_increment_refcount()) {
354     print();
355     fatal("refcount has gone to zero");
356   }
357 #ifndef PRODUCT
358   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
359     NOT_PRODUCT(Atomic::inc(&_total_count);)
360   }
361 #endif
362 }
363 
364 // Decrement refcount potentially while racing increment, so we need
365 // to check the value after attempting to decrement so that if another
366 // thread increments to PERM_REFCOUNT the value is not decremented.
367 void Symbol::decrement_refcount() {
368   uint32_t found = _hash_and_refcount;
369   while (true) {
370     uint32_t old_value = found;
371     int refc = extract_refcount(old_value);
372     if (refc == PERM_REFCOUNT) {
373       return;  // refcount is permanent, permanent is sticky
374     } else if (refc == 0) {
375       print();
376       fatal("refcount underflow");
377       return;
378     } else {
379       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
380       if (found == old_value) {
381         return;  // successfully updated.
382       }
383       // refcount changed, try again.
384     }
385   }
386 }
387 
388 void Symbol::make_permanent() {
389   uint32_t found = _hash_and_refcount;
390   while (true) {
391     uint32_t old_value = found;
392     int refc = extract_refcount(old_value);
393     if (refc == PERM_REFCOUNT) {
394       return;  // refcount is permanent, permanent is sticky
395     } else if (refc == 0) {
396       print();
397       fatal("refcount underflow");
398       return;
399     } else {
400       short hash = extract_hash(old_value);
401       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
402       if (found == old_value) {
403         return;  // successfully updated.
404       }
405       // refcount changed, try again.
406     }
407   }
408 }
409 
410 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
411   if (log_is_enabled(Trace, cds)) {
412     LogStream trace_stream(Log(cds)::trace());
413     trace_stream.print("Iter(Symbol): %p ", this);
414     print_value_on(&trace_stream);
415     trace_stream.cr();
416   }
417 }
418 
419 void Symbol::print_on(outputStream* st) const {
420   st->print("Symbol: '");
421   print_symbol_on(st);
422   st->print("'");
423   st->print(" count %d", refcount());
424 }
425 
426 void Symbol::print() const { print_on(tty); }
427 
428 // The print_value functions are present in all builds, to support the
429 // disassembler and error reporting.
430 void Symbol::print_value_on(outputStream* st) const {
431   st->print_raw("'", 1);
432   st->print_raw((const char*)base(), utf8_length());
433   st->print_raw("'", 1);
434 }
435 
436 void Symbol::print_value() const { print_value_on(tty); }
437 
438 bool Symbol::is_valid(Symbol* s) {
439   if (!is_aligned(s, sizeof(MetaWord))) return false;
440   if ((size_t)s < os::min_page_size()) return false;
441 
442   if (!os::is_readable_range(s, s + 1)) return false;
443 
444   // Symbols are not allocated in Java heap.
445   if (Universe::heap()->is_in(s)) return false;
446 
447   int len = s->utf8_length();
448   if (len < 0) return false;
449 
450   jbyte* bytes = (jbyte*) s->bytes();
451   return os::is_readable_range(bytes, bytes + len);
452 }
453 
454 // SymbolTable prints this in its statistics
455 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
456 
457 #ifndef PRODUCT
458 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
459   return vmSymbols::is_valid_id(vm_symbol_id);
460 }
461 #endif