1 /*
  2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 #include "precompiled.hpp"
 27 #include "cds/metaspaceShared.hpp"
 28 #include "classfile/altHashing.hpp"
 29 #include "classfile/classLoaderData.hpp"
 30 #include "classfile/vmSymbols.hpp"
 31 #include "gc/shared/collectedHeap.hpp"
 32 #include "logging/log.hpp"
 33 #include "logging/logStream.hpp"
 34 #include "memory/allocation.inline.hpp"
 35 #include "memory/resourceArea.hpp"
 36 #include "memory/universe.hpp"
 37 #include "oops/symbol.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/mutexLocker.hpp"
 40 #include "runtime/os.hpp"
 41 #include "runtime/signature.hpp"
 42 #include "utilities/stringUtils.hpp"
 43 #include "utilities/utf8.hpp"
 44 
 45 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 46 
 47 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 48   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 49   assert(refcount >= 0, "negative refcount");
 50   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 51   uint32_t hi = hash;
 52   uint32_t lo = refcount;
 53   return (hi << 16) | lo;
 54 }
 55 
 56 Symbol::Symbol(const u1* name, int length, int refcount) {
 57   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 58   _length = length;
 59   // _body[0..1] are allocated in the header just by coincidence in the current
 60   // implementation of Symbol. They are read by identity_hash(), so make sure they
 61   // are initialized.
 62   // No other code should assume that _body[0..1] are always allocated. E.g., do
 63   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 64   _body[0] = _body[1] = 0;
 65   memcpy(_body, name, length);
 66 }
 67 
 68 // This copies the symbol when it is added to the ConcurrentHashTable.
 69 Symbol::Symbol(const Symbol& s1) {
 70   _hash_and_refcount = s1._hash_and_refcount;
 71   _length = s1._length;
 72   memcpy(_body, s1._body, _length);
 73 }
 74 
 75 #if INCLUDE_CDS
 76 void Symbol::update_identity_hash() {
 77   // This is called at a safepoint during dumping of a static CDS archive. The caller should have
 78   // called os::init_random() with a deterministic seed and then iterate all archived Symbols in
 79   // a deterministic order.
 80   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 81   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), PERM_REFCOUNT);
 82 }
 83 
 84 void Symbol::set_permanent() {
 85   // This is called at a safepoint during dumping of a dynamic CDS archive.
 86   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 87   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 88 }
 89 #endif
 90 
 91 bool Symbol::is_Q_signature() const {
 92   int len = utf8_length();
 93   return len > 2 && char_at(0) == JVM_SIGNATURE_PRIMITIVE_OBJECT && char_at(len - 1) == JVM_SIGNATURE_ENDCLASS;
 94 }
 95 
 96 bool Symbol::is_Q_array_signature() const {
 97   int l = utf8_length();
 98   if (l < 2 || char_at(0) != JVM_SIGNATURE_ARRAY || char_at(l - 1) != JVM_SIGNATURE_ENDCLASS) {
 99     return false;
100   }
101   for (int i = 1; i < (l - 2); i++) {
102     char c = char_at(i);
103     if (c == JVM_SIGNATURE_PRIMITIVE_OBJECT) {
104       return true;
105     }
106     if (c != JVM_SIGNATURE_ARRAY) {
107       return false;
108     }
109   }
110   return false;
111 }
112 
113 bool Symbol::is_Q_method_signature() const {
114   assert(SignatureVerifier::is_valid_method_signature(this), "must be");
115   int len = utf8_length();
116   if (len > 4 && char_at(0) == JVM_SIGNATURE_FUNC) {
117     for (int i=1; i<len-3; i++) { // Must end with ")Qx;", where x is at least one character or more.
118       if (char_at(i) == JVM_SIGNATURE_ENDFUNC && char_at(i+1) == JVM_SIGNATURE_PRIMITIVE_OBJECT) {
119         return true;
120       }
121     }
122   }
123   return false;
124 }
125 
126 Symbol* Symbol::fundamental_name(TRAPS) {
127   if ((char_at(0) == JVM_SIGNATURE_PRIMITIVE_OBJECT || char_at(0) == JVM_SIGNATURE_CLASS) && ends_with(JVM_SIGNATURE_ENDCLASS)) {
128     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
129   } else {
130     // reference count is incremented to be consistent with the behavior with
131     // the SymbolTable::new_symbol() call above
132     this->increment_refcount();
133     return this;
134   }
135 }
136 
137 bool Symbol::is_same_fundamental_type(Symbol* s) const {
138   if (this == s) return true;
139   if (utf8_length() < 3) return false;
140   int offset1, offset2, len;
141   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
142     if (char_at(0) != JVM_SIGNATURE_PRIMITIVE_OBJECT && char_at(0) != JVM_SIGNATURE_CLASS) return false;
143     offset1 = 1;
144     len = utf8_length() - 2;
145   } else {
146     offset1 = 0;
147     len = utf8_length();
148   }
149   if (ends_with(JVM_SIGNATURE_ENDCLASS)) {
150     if (s->char_at(0) != JVM_SIGNATURE_PRIMITIVE_OBJECT && s->char_at(0) != JVM_SIGNATURE_CLASS) return false;
151     offset2 = 1;
152   } else {
153     offset2 = 0;
154   }
155   if ((offset2 + len) > s->utf8_length()) return false;
156   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
157     return false;
158   int l = len;
159   while (l-- > 0) {
160     if (char_at(offset1 + l) != s->char_at(offset2 + l))
161       return false;
162   }
163   return true;
164 }
165 
166 // ------------------------------------------------------------------
167 // Symbol::index_of
168 //
169 // Test if we have the give substring at or after the i-th char of this
170 // symbol's utf8 bytes.
171 // Return -1 on failure.  Otherwise return the first index where substr occurs.
172 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
173   assert(i >= 0 && i <= utf8_length(), "oob");
174   if (substr_len <= 0)  return 0;
175   char first_char = substr[0];
176   address bytes = (address) ((Symbol*)this)->base();
177   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
178   address scan = bytes + i;
179   if (scan > limit)
180     return -1;
181   for (; scan <= limit; scan++) {
182     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
183     if (scan == nullptr)
184       return -1;  // not found
185     assert(scan >= bytes+i && scan <= limit, "scan oob");
186     if (substr_len <= 2
187         ? (char) scan[substr_len-1] == substr[substr_len-1]
188         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
189       return (int)(scan - bytes);
190     }
191   }
192   return -1;
193 }
194 
195 bool Symbol::is_star_match(const char* pattern) const {
196   if (strchr(pattern, '*') == nullptr) {
197     return equals(pattern);
198   } else {
199     ResourceMark rm;
200     char* buf = as_C_string();
201     return StringUtils::is_star_match(pattern, buf);
202   }
203 }
204 
205 char* Symbol::as_C_string(char* buf, int size) const {
206   if (size > 0) {
207     int len = MIN2(size - 1, utf8_length());
208     for (int i = 0; i < len; i++) {
209       buf[i] = char_at(i);
210     }
211     buf[len] = '\0';
212   }
213   return buf;
214 }
215 
216 char* Symbol::as_C_string() const {
217   int len = utf8_length();
218   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
219   return as_C_string(str, len + 1);
220 }
221 
222 void Symbol::print_utf8_on(outputStream* st) const {
223   st->print("%s", as_C_string());
224 }
225 
226 void Symbol::print_symbol_on(outputStream* st) const {
227   char *s;
228   st = st ? st : tty;
229   {
230     // ResourceMark may not affect st->print(). If st is a string
231     // stream it could resize, using the same resource arena.
232     ResourceMark rm;
233     s = as_quoted_ascii();
234     s = os::strdup(s);
235   }
236   if (s == nullptr) {
237     st->print("(null)");
238   } else {
239     st->print("%s", s);
240     os::free(s);
241   }
242 }
243 
244 char* Symbol::as_quoted_ascii() const {
245   const char *ptr = (const char *)&_body[0];
246   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
247   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
248   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
249   return result;
250 }
251 
252 jchar* Symbol::as_unicode(int& length) const {
253   Symbol* this_ptr = (Symbol*)this;
254   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
255   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
256   if (length > 0) {
257     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
258   }
259   return result;
260 }
261 
262 const char* Symbol::as_klass_external_name(char* buf, int size) const {
263   if (size > 0) {
264     char* str    = as_C_string(buf, size);
265     int   length = (int)strlen(str);
266     // Turn all '/'s into '.'s (also for array klasses)
267     for (int index = 0; index < length; index++) {
268       if (str[index] == JVM_SIGNATURE_SLASH) {
269         str[index] = JVM_SIGNATURE_DOT;
270       }
271     }
272     return str;
273   } else {
274     return buf;
275   }
276 }
277 
278 const char* Symbol::as_klass_external_name() const {
279   char* str    = as_C_string();
280   int   length = (int)strlen(str);
281   // Turn all '/'s into '.'s (also for array klasses)
282   for (int index = 0; index < length; index++) {
283     if (str[index] == JVM_SIGNATURE_SLASH) {
284       str[index] = JVM_SIGNATURE_DOT;
285     }
286   }
287   return str;
288 }
289 
290 static void print_class(outputStream *os, const SignatureStream& ss) {
291   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
292   for (int i = sb; i < se; ++i) {
293     int ch = ss.raw_char_at(i);
294     if (ch == JVM_SIGNATURE_SLASH) {
295       os->put(JVM_SIGNATURE_DOT);
296     } else {
297       os->put(ch);
298     }
299   }
300 }
301 
302 static void print_array(outputStream *os, SignatureStream& ss) {
303   int dimensions = ss.skip_array_prefix();
304   assert(dimensions > 0, "");
305   if (ss.is_reference()) {
306     print_class(os, ss);
307   } else {
308     os->print("%s", type2name(ss.type()));
309   }
310   for (int i = 0; i < dimensions; ++i) {
311     os->print("[]");
312   }
313 }
314 
315 void Symbol::print_as_signature_external_return_type(outputStream *os) {
316   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
317     if (ss.at_return_type()) {
318       if (ss.is_array()) {
319         print_array(os, ss);
320       } else if (ss.is_reference()) {
321         print_class(os, ss);
322       } else {
323         os->print("%s", type2name(ss.type()));
324       }
325     }
326   }
327 }
328 
329 void Symbol::print_as_signature_external_parameters(outputStream *os) {
330   bool first = true;
331   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
332     if (ss.at_return_type()) break;
333     if (!first) { os->print(", "); }
334     if (ss.is_array()) {
335       print_array(os, ss);
336     } else if (ss.is_reference()) {
337       print_class(os, ss);
338     } else {
339       os->print("%s", type2name(ss.type()));
340     }
341     first = false;
342   }
343 }
344 
345 void Symbol::print_as_field_external_type(outputStream *os) {
346   SignatureStream ss(this, false);
347   assert(!ss.is_done(), "must have at least one element in field ref");
348   assert(!ss.at_return_type(), "field ref cannot be a return type");
349   assert(!Signature::is_method(this), "field ref cannot be a method");
350 
351   if (ss.is_array()) {
352     print_array(os, ss);
353   } else if (ss.is_reference()) {
354     print_class(os, ss);
355   } else {
356     os->print("%s", type2name(ss.type()));
357   }
358 #ifdef ASSERT
359   ss.next();
360   assert(ss.is_done(), "must have at most one element in field ref");
361 #endif
362 }
363 
364 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
365 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
366 // lookup to avoid reviving a dead Symbol.
367 bool Symbol::try_increment_refcount() {
368   uint32_t found = _hash_and_refcount;
369   while (true) {
370     uint32_t old_value = found;
371     int refc = extract_refcount(old_value);
372     if (refc == PERM_REFCOUNT) {
373       return true;  // sticky max or created permanent
374     } else if (refc == 0) {
375       return false; // dead, can't revive.
376     } else {
377       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
378       if (found == old_value) {
379         return true; // successfully updated.
380       }
381       // refcount changed, try again.
382     }
383   }
384 }
385 
386 // The increment_refcount() is called when not doing lookup. It is assumed that you
387 // have a symbol with a non-zero refcount and it can't become zero while referenced by
388 // this caller.
389 void Symbol::increment_refcount() {
390   if (!try_increment_refcount()) {
391     print();
392     fatal("refcount has gone to zero");
393   }
394 #ifndef PRODUCT
395   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
396     NOT_PRODUCT(Atomic::inc(&_total_count);)
397   }
398 #endif
399 }
400 
401 // Decrement refcount potentially while racing increment, so we need
402 // to check the value after attempting to decrement so that if another
403 // thread increments to PERM_REFCOUNT the value is not decremented.
404 void Symbol::decrement_refcount() {
405   uint32_t found = _hash_and_refcount;
406   while (true) {
407     uint32_t old_value = found;
408     int refc = extract_refcount(old_value);
409     if (refc == PERM_REFCOUNT) {
410       return;  // refcount is permanent, permanent is sticky
411     } else if (refc == 0) {
412       print();
413       fatal("refcount underflow");
414       return;
415     } else {
416       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
417       if (found == old_value) {
418         return;  // successfully updated.
419       }
420       // refcount changed, try again.
421     }
422   }
423 }
424 
425 void Symbol::make_permanent() {
426   uint32_t found = _hash_and_refcount;
427   while (true) {
428     uint32_t old_value = found;
429     int refc = extract_refcount(old_value);
430     if (refc == PERM_REFCOUNT) {
431       return;  // refcount is permanent, permanent is sticky
432     } else if (refc == 0) {
433       print();
434       fatal("refcount underflow");
435       return;
436     } else {
437       int hash = extract_hash(old_value);
438       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
439       if (found == old_value) {
440         return;  // successfully updated.
441       }
442       // refcount changed, try again.
443     }
444   }
445 }
446 
447 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
448   if (log_is_enabled(Trace, cds)) {
449     LogStream trace_stream(Log(cds)::trace());
450     trace_stream.print("Iter(Symbol): %p ", this);
451     print_value_on(&trace_stream);
452     trace_stream.cr();
453   }
454 }
455 
456 void Symbol::print_on(outputStream* st) const {
457   st->print("Symbol: '");
458   print_symbol_on(st);
459   st->print("'");
460   st->print(" count %d", refcount());
461 }
462 
463 void Symbol::print() const { print_on(tty); }
464 
465 // The print_value functions are present in all builds, to support the
466 // disassembler and error reporting.
467 void Symbol::print_value_on(outputStream* st) const {
468   st->print("'");
469   for (int i = 0; i < utf8_length(); i++) {
470     st->print("%c", char_at(i));
471   }
472   st->print("'");
473 }
474 
475 void Symbol::print_value() const { print_value_on(tty); }
476 
477 bool Symbol::is_valid(Symbol* s) {
478   if (!is_aligned(s, sizeof(MetaWord))) return false;
479   if ((size_t)s < os::min_page_size()) return false;
480 
481   if (!os::is_readable_range(s, s + 1)) return false;
482 
483   // Symbols are not allocated in Java heap.
484   if (Universe::heap()->is_in(s)) return false;
485 
486   int len = s->utf8_length();
487   if (len < 0) return false;
488 
489   jbyte* bytes = (jbyte*) s->bytes();
490   return os::is_readable_range(bytes, bytes + len);
491 }
492 
493 void Symbol::print_Qvalue_on(outputStream* st) const {
494   st->print("'Q");
495   for (int i = 0; i < utf8_length(); i++) {
496     st->print("%c", char_at(i));
497   }
498   st->print(";'");
499 }
500 
501 // SymbolTable prints this in its statistics
502 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
503 
504 #ifndef PRODUCT
505 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
506   return vmSymbols::is_valid_id(vm_symbol_id);
507 }
508 #endif