1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "cds/metaspaceShared.hpp"
 27 #include "classfile/altHashing.hpp"
 28 #include "classfile/classLoaderData.hpp"
 29 #include "classfile/vmSymbols.hpp"
 30 #include "gc/shared/collectedHeap.hpp"
 31 #include "logging/log.hpp"
 32 #include "logging/logStream.hpp"
 33 #include "memory/allocation.inline.hpp"
 34 #include "memory/resourceArea.hpp"
 35 #include "memory/universe.hpp"
 36 #include "oops/symbol.hpp"
 37 #include "runtime/atomic.hpp"
 38 #include "runtime/mutexLocker.hpp"
 39 #include "runtime/os.hpp"
 40 #include "runtime/signature.hpp"
 41 #include "utilities/stringUtils.hpp"
 42 #include "utilities/utf8.hpp"
 43 
 44 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 45 
 46 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 47   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 48   assert(refcount >= 0, "negative refcount");
 49   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 50   uint32_t hi = hash;
 51   uint32_t lo = refcount;
 52   return (hi << 16) | lo;
 53 }
 54 
 55 Symbol::Symbol(const u1* name, int length, int refcount) {
 56   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 57   _length = (u2)length;
 58   // _body[0..1] are allocated in the header just by coincidence in the current
 59   // implementation of Symbol. They are read by identity_hash(), so make sure they
 60   // are initialized.
 61   // No other code should assume that _body[0..1] are always allocated. E.g., do
 62   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 63   _body[0] = _body[1] = 0;
 64   memcpy(_body, name, length);
 65 }
 66 
 67 // This copies the symbol when it is added to the ConcurrentHashTable.
 68 Symbol::Symbol(const Symbol& s1) {
 69   _hash_and_refcount = s1._hash_and_refcount;
 70   _length = s1._length;
 71   memcpy(_body, s1._body, _length);
 72 }
 73 
 74 #if INCLUDE_CDS
 75 void Symbol::update_identity_hash() {
 76   // This is called at a safepoint during dumping of a static CDS archive. The caller should have
 77   // called os::init_random() with a deterministic seed and then iterate all archived Symbols in
 78   // a deterministic order.
 79   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 80   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), PERM_REFCOUNT);
 81 }
 82 
 83 void Symbol::set_permanent() {
 84   // This is called at a safepoint during dumping of a dynamic CDS archive.
 85   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 86   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 87 }
 88 #endif
 89 
 90 Symbol* Symbol::fundamental_name(TRAPS) {
 91   if (char_at(0) == JVM_SIGNATURE_CLASS && ends_with(JVM_SIGNATURE_ENDCLASS)) {
 92     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
 93   } else {
 94     // reference count is incremented to be consistent with the behavior with
 95     // the SymbolTable::new_symbol() call above
 96     this->increment_refcount();
 97     return this;
 98   }
 99 }
100 
101 bool Symbol::is_same_fundamental_type(Symbol* s) const {
102   if (this == s) return true;
103   if (utf8_length() < 3) return false;
104   int offset1, offset2, len;
105   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
106     if (char_at(0) != JVM_SIGNATURE_CLASS) return false;
107     offset1 = 1;
108     len = utf8_length() - 2;
109   } else {
110     offset1 = 0;
111     len = utf8_length();
112   }
113   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
114     if (s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
115     offset2 = 1;
116   } else {
117     offset2 = 0;
118   }
119   if ((offset2 + len) > s->utf8_length()) return false;
120   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
121     return false;
122   int l = len;
123   while (l-- > 0) {
124     if (char_at(offset1 + l) != s->char_at(offset2 + l))
125       return false;
126   }
127   return true;
128 }
129 
130 // ------------------------------------------------------------------
131 // Symbol::index_of
132 //
133 // Test if we have the give substring at or after the i-th char of this
134 // symbol's utf8 bytes.
135 // Return -1 on failure.  Otherwise return the first index where substr occurs.
136 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
137   assert(i >= 0 && i <= utf8_length(), "oob");
138   if (substr_len <= 0)  return 0;
139   char first_char = substr[0];
140   address bytes = (address) ((Symbol*)this)->base();
141   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
142   address scan = bytes + i;
143   if (scan > limit)
144     return -1;
145   for (; scan <= limit; scan++) {
146     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
147     if (scan == nullptr)
148       return -1;  // not found
149     assert(scan >= bytes+i && scan <= limit, "scan oob");
150     if (substr_len <= 2
151         ? (char) scan[substr_len-1] == substr[substr_len-1]
152         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
153       return (int)(scan - bytes);
154     }
155   }
156   return -1;
157 }
158 
159 bool Symbol::is_star_match(const char* pattern) const {
160   if (strchr(pattern, '*') == nullptr) {
161     return equals(pattern);
162   } else {
163     ResourceMark rm;
164     char* buf = as_C_string();
165     return StringUtils::is_star_match(pattern, buf);
166   }
167 }
168 
169 char* Symbol::as_C_string(char* buf, int size) const {
170   if (size > 0) {
171     int len = MIN2(size - 1, utf8_length());
172     for (int i = 0; i < len; i++) {
173       buf[i] = char_at(i);
174     }
175     buf[len] = '\0';
176   }
177   return buf;
178 }
179 
180 char* Symbol::as_C_string() const {
181   int len = utf8_length();
182   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
183   return as_C_string(str, len + 1);
184 }
185 
186 void Symbol::print_utf8_on(outputStream* st) const {
187   st->print("%s", as_C_string());
188 }
189 
190 void Symbol::print_symbol_on(outputStream* st) const {
191   char *s;
192   st = st ? st : tty;
193   {
194     // ResourceMark may not affect st->print(). If st is a string
195     // stream it could resize, using the same resource arena.
196     ResourceMark rm;
197     s = as_quoted_ascii();
198     s = os::strdup(s);
199   }
200   if (s == nullptr) {
201     st->print("(null)");
202   } else {
203     st->print("%s", s);
204     os::free(s);
205   }
206 }
207 
208 char* Symbol::as_quoted_ascii() const {
209   const char *ptr = (const char *)&_body[0];
210   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
211   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
212   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
213   return result;
214 }
215 
216 jchar* Symbol::as_unicode(int& length) const {
217   Symbol* this_ptr = (Symbol*)this;
218   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
219   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
220   if (length > 0) {
221     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
222   }
223   return result;
224 }
225 
226 const char* Symbol::as_klass_external_name(char* buf, int size) const {
227   if (size > 0) {
228     char* str    = as_C_string(buf, size);
229     int   length = (int)strlen(str);
230     // Turn all '/'s into '.'s (also for array klasses)
231     for (int index = 0; index < length; index++) {
232       if (str[index] == JVM_SIGNATURE_SLASH) {
233         str[index] = JVM_SIGNATURE_DOT;
234       }
235     }
236     return str;
237   } else {
238     return buf;
239   }
240 }
241 
242 const char* Symbol::as_klass_external_name() const {
243   char* str    = as_C_string();
244   int   length = (int)strlen(str);
245   // Turn all '/'s into '.'s (also for array klasses)
246   for (int index = 0; index < length; index++) {
247     if (str[index] == JVM_SIGNATURE_SLASH) {
248       str[index] = JVM_SIGNATURE_DOT;
249     }
250   }
251   return str;
252 }
253 
254 static void print_class(outputStream *os, const SignatureStream& ss) {
255   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
256   for (int i = sb; i < se; ++i) {
257     char ch = ss.raw_char_at(i);
258     if (ch == JVM_SIGNATURE_SLASH) {
259       os->put(JVM_SIGNATURE_DOT);
260     } else {
261       os->put(ch);
262     }
263   }
264 }
265 
266 static void print_array(outputStream *os, SignatureStream& ss) {
267   int dimensions = ss.skip_array_prefix();
268   assert(dimensions > 0, "");
269   if (ss.is_reference()) {
270     print_class(os, ss);
271   } else {
272     os->print("%s", type2name(ss.type()));
273   }
274   for (int i = 0; i < dimensions; ++i) {
275     os->print("[]");
276   }
277 }
278 
279 void Symbol::print_as_signature_external_return_type(outputStream *os) {
280   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
281     if (ss.at_return_type()) {
282       if (ss.is_array()) {
283         print_array(os, ss);
284       } else if (ss.is_reference()) {
285         print_class(os, ss);
286       } else {
287         os->print("%s", type2name(ss.type()));
288       }
289     }
290   }
291 }
292 
293 void Symbol::print_as_signature_external_parameters(outputStream *os) {
294   bool first = true;
295   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
296     if (ss.at_return_type()) break;
297     if (!first) { os->print(", "); }
298     if (ss.is_array()) {
299       print_array(os, ss);
300     } else if (ss.is_reference()) {
301       print_class(os, ss);
302     } else {
303       os->print("%s", type2name(ss.type()));
304     }
305     first = false;
306   }
307 }
308 
309 void Symbol::print_as_field_external_type(outputStream *os) {
310   SignatureStream ss(this, false);
311   assert(!ss.is_done(), "must have at least one element in field ref");
312   assert(!ss.at_return_type(), "field ref cannot be a return type");
313   assert(!Signature::is_method(this), "field ref cannot be a method");
314 
315   if (ss.is_array()) {
316     print_array(os, ss);
317   } else if (ss.is_reference()) {
318     print_class(os, ss);
319   } else {
320     os->print("%s", type2name(ss.type()));
321   }
322 #ifdef ASSERT
323   ss.next();
324   assert(ss.is_done(), "must have at most one element in field ref");
325 #endif
326 }
327 
328 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
329 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
330 // lookup to avoid reviving a dead Symbol.
331 bool Symbol::try_increment_refcount() {
332   uint32_t found = _hash_and_refcount;
333   while (true) {
334     uint32_t old_value = found;
335     int refc = extract_refcount(old_value);
336     if (refc == PERM_REFCOUNT) {
337       return true;  // sticky max or created permanent
338     } else if (refc == 0) {
339       return false; // dead, can't revive.
340     } else {
341       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
342       if (found == old_value) {
343         return true; // successfully updated.
344       }
345       // refcount changed, try again.
346     }
347   }
348 }
349 
350 // The increment_refcount() is called when not doing lookup. It is assumed that you
351 // have a symbol with a non-zero refcount and it can't become zero while referenced by
352 // this caller.
353 void Symbol::increment_refcount() {
354   if (!try_increment_refcount()) {
355     print();
356     fatal("refcount has gone to zero");
357   }
358 #ifndef PRODUCT
359   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
360     NOT_PRODUCT(Atomic::inc(&_total_count);)
361   }
362 #endif
363 }
364 
365 // Decrement refcount potentially while racing increment, so we need
366 // to check the value after attempting to decrement so that if another
367 // thread increments to PERM_REFCOUNT the value is not decremented.
368 void Symbol::decrement_refcount() {
369   uint32_t found = _hash_and_refcount;
370   while (true) {
371     uint32_t old_value = found;
372     int refc = extract_refcount(old_value);
373     if (refc == PERM_REFCOUNT) {
374       return;  // refcount is permanent, permanent is sticky
375     } else if (refc == 0) {
376       print();
377       fatal("refcount underflow");
378       return;
379     } else {
380       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
381       if (found == old_value) {
382         return;  // successfully updated.
383       }
384       // refcount changed, try again.
385     }
386   }
387 }
388 
389 void Symbol::make_permanent() {
390   uint32_t found = _hash_and_refcount;
391   while (true) {
392     uint32_t old_value = found;
393     int refc = extract_refcount(old_value);
394     if (refc == PERM_REFCOUNT) {
395       return;  // refcount is permanent, permanent is sticky
396     } else if (refc == 0) {
397       print();
398       fatal("refcount underflow");
399       return;
400     } else {
401       short hash = extract_hash(old_value);
402       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
403       if (found == old_value) {
404         return;  // successfully updated.
405       }
406       // refcount changed, try again.
407     }
408   }
409 }
410 
411 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
412   if (log_is_enabled(Trace, cds)) {
413     LogStream trace_stream(Log(cds)::trace());
414     trace_stream.print("Iter(Symbol): %p ", this);
415     print_value_on(&trace_stream);
416     trace_stream.cr();
417   }
418 }
419 
420 void Symbol::print_on(outputStream* st) const {
421   st->print("Symbol: '");
422   print_symbol_on(st);
423   st->print("'");
424   st->print(" count %d", refcount());
425 }
426 
427 void Symbol::print() const { print_on(tty); }
428 
429 // The print_value functions are present in all builds, to support the
430 // disassembler and error reporting.
431 void Symbol::print_value_on(outputStream* st) const {
432   st->print_raw("'", 1);
433   st->print_raw((const char*)base(), utf8_length());
434   st->print_raw("'", 1);
435 }
436 
437 void Symbol::print_value() const { print_value_on(tty); }
438 
439 bool Symbol::is_valid(Symbol* s) {
440   if (!is_aligned(s, sizeof(MetaWord))) return false;
441   if ((size_t)s < os::min_page_size()) return false;
442 
443   if (!os::is_readable_range(s, s + 1)) return false;
444 
445   // Symbols are not allocated in Java heap.
446   if (Universe::heap()->is_in(s)) return false;
447 
448   int len = s->utf8_length();
449   if (len < 0) return false;
450 
451   jbyte* bytes = (jbyte*) s->bytes();
452   return os::is_readable_range(bytes, bytes + len);
453 }
454 
455 // SymbolTable prints this in its statistics
456 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
457 
458 #ifndef PRODUCT
459 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
460   return vmSymbols::is_valid_id(vm_symbol_id);
461 }
462 #endif