1 /*
  2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "cds/archiveBuilder.hpp"
 27 #include "cds/metaspaceShared.hpp"
 28 #include "classfile/altHashing.hpp"
 29 #include "classfile/classLoaderData.hpp"
 30 #include "classfile/vmSymbols.hpp"
 31 #include "gc/shared/collectedHeap.hpp"
 32 #include "logging/log.hpp"
 33 #include "logging/logStream.hpp"
 34 #include "memory/allocation.inline.hpp"
 35 #include "memory/resourceArea.hpp"
 36 #include "memory/universe.hpp"
 37 #include "oops/symbol.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/mutexLocker.hpp"
 40 #include "runtime/os.hpp"
 41 #include "runtime/signature.hpp"
 42 #include "utilities/stringUtils.hpp"
 43 #include "utilities/utf8.hpp"
 44 
 45 Symbol* Symbol::_vm_symbols[vmSymbols::number_of_symbols()];
 46 
 47 uint32_t Symbol::pack_hash_and_refcount(short hash, int refcount) {
 48   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
 49   assert(refcount >= 0, "negative refcount");
 50   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
 51   uint32_t hi = hash;
 52   uint32_t lo = refcount;
 53   return (hi << 16) | lo;
 54 }
 55 
 56 Symbol::Symbol(const u1* name, int length, int refcount) {
 57   assert(length <= max_length(), "SymbolTable should have caught this!");
 58   _hash_and_refcount =  pack_hash_and_refcount((short)os::random(), refcount);
 59   _length = (u2)length;
 60   // _body[0..1] are allocated in the header just by coincidence in the current
 61   // implementation of Symbol. They are read by identity_hash(), so make sure they
 62   // are initialized.
 63   // No other code should assume that _body[0..1] are always allocated. E.g., do
 64   // not unconditionally read base()[0] as that will be invalid for an empty Symbol.
 65   _body[0] = _body[1] = 0;
 66   memcpy(_body, name, length);
 67 }
 68 
 69 // This copies the symbol when it is added to the ConcurrentHashTable.
 70 Symbol::Symbol(const Symbol& s1) {
 71   _hash_and_refcount = s1._hash_and_refcount;
 72   _length = s1._length;
 73   memcpy(_body, s1._body, _length);
 74 }
 75 
 76 #if INCLUDE_CDS
 77 void Symbol::update_identity_hash() {
 78   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 79   _hash_and_refcount =  pack_hash_and_refcount((short)ArchiveBuilder::current()->entropy(), PERM_REFCOUNT);
 80 }
 81 
 82 void Symbol::set_permanent() {
 83   // This is called at a safepoint during dumping of a dynamic CDS archive.
 84   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
 85   _hash_and_refcount =  pack_hash_and_refcount(extract_hash(_hash_and_refcount), PERM_REFCOUNT);
 86 }
 87 #endif
 88 
 89 // ------------------------------------------------------------------
 90 // Symbol::index_of
 91 //
 92 // Test if we have the give substring at or after the i-th char of this
 93 // symbol's utf8 bytes.
 94 // Return -1 on failure.  Otherwise return the first index where substr occurs.
 95 int Symbol::index_of_at(int i, const char* substr, int substr_len) const {
 96   assert(i >= 0 && i <= utf8_length(), "oob");
 97   if (substr_len <= 0)  return 0;
 98   char first_char = substr[0];
 99   address bytes = (address) ((Symbol*)this)->base();
100   address limit = bytes + utf8_length() - substr_len;  // inclusive limit
101   address scan = bytes + i;
102   if (scan > limit)
103     return -1;
104   for (; scan <= limit; scan++) {
105     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
106     if (scan == nullptr)
107       return -1;  // not found
108     assert(scan >= bytes+i && scan <= limit, "scan oob");
109     if (substr_len <= 2
110         ? (char) scan[substr_len-1] == substr[substr_len-1]
111         : memcmp(scan+1, substr+1, substr_len-1) == 0) {
112       return (int)(scan - bytes);
113     }
114   }
115   return -1;
116 }
117 
118 bool Symbol::is_star_match(const char* pattern) const {
119   if (strchr(pattern, '*') == nullptr) {
120     return equals(pattern);
121   } else {
122     ResourceMark rm;
123     char* buf = as_C_string();
124     return StringUtils::is_star_match(pattern, buf);
125   }
126 }
127 
128 char* Symbol::as_C_string(char* buf, int size) const {
129   if (size > 0) {
130     int len = MIN2(size - 1, utf8_length());
131     for (int i = 0; i < len; i++) {
132       buf[i] = char_at(i);
133     }
134     buf[len] = '\0';
135   }
136   return buf;
137 }
138 
139 char* Symbol::as_C_string() const {
140   int len = utf8_length();
141   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
142   return as_C_string(str, len + 1);
143 }
144 
145 void Symbol::print_utf8_on(outputStream* st) const {
146   st->print("%s", as_C_string());
147 }
148 
149 void Symbol::print_symbol_on(outputStream* st) const {
150   char *s;
151   st = st ? st : tty;
152   {
153     // ResourceMark may not affect st->print(). If st is a string
154     // stream it could resize, using the same resource arena.
155     ResourceMark rm;
156     s = as_quoted_ascii();
157     s = os::strdup(s);
158   }
159   if (s == nullptr) {
160     st->print("(null)");
161   } else {
162     st->print("%s", s);
163     os::free(s);
164   }
165 }
166 
167 char* Symbol::as_quoted_ascii() const {
168   const char *ptr = (const char *)&_body[0];
169   size_t quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
170   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
171   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
172   return result;
173 }
174 
175 jchar* Symbol::as_unicode(int& length) const {
176   Symbol* this_ptr = (Symbol*)this;
177   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
178   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
179   if (length > 0) {
180     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
181   }
182   return result;
183 }
184 
185 const char* Symbol::as_klass_external_name(char* buf, int size) const {
186   if (size > 0) {
187     char* str    = as_C_string(buf, size);
188     int   length = (int)strlen(str);
189     // Turn all '/'s into '.'s (also for array klasses)
190     for (int index = 0; index < length; index++) {
191       if (str[index] == JVM_SIGNATURE_SLASH) {
192         str[index] = JVM_SIGNATURE_DOT;
193       }
194     }
195     return str;
196   } else {
197     return buf;
198   }
199 }
200 
201 const char* Symbol::as_klass_external_name() const {
202   char* str    = as_C_string();
203   int   length = (int)strlen(str);
204   // Turn all '/'s into '.'s (also for array klasses)
205   for (int index = 0; index < length; index++) {
206     if (str[index] == JVM_SIGNATURE_SLASH) {
207       str[index] = JVM_SIGNATURE_DOT;
208     }
209   }
210   return str;
211 }
212 
213 static void print_class(outputStream *os, const SignatureStream& ss) {
214   int sb = ss.raw_symbol_begin(), se = ss.raw_symbol_end();
215   for (int i = sb; i < se; ++i) {
216     char ch = ss.raw_char_at(i);
217     if (ch == JVM_SIGNATURE_SLASH) {
218       os->put(JVM_SIGNATURE_DOT);
219     } else {
220       os->put(ch);
221     }
222   }
223 }
224 
225 static void print_array(outputStream *os, SignatureStream& ss) {
226   int dimensions = ss.skip_array_prefix();
227   assert(dimensions > 0, "");
228   if (ss.is_reference()) {
229     print_class(os, ss);
230   } else {
231     os->print("%s", type2name(ss.type()));
232   }
233   for (int i = 0; i < dimensions; ++i) {
234     os->print("[]");
235   }
236 }
237 
238 void Symbol::print_as_signature_external_return_type(outputStream *os) {
239   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
240     if (ss.at_return_type()) {
241       if (ss.is_array()) {
242         print_array(os, ss);
243       } else if (ss.is_reference()) {
244         print_class(os, ss);
245       } else {
246         os->print("%s", type2name(ss.type()));
247       }
248     }
249   }
250 }
251 
252 void Symbol::print_as_signature_external_parameters(outputStream *os) {
253   bool first = true;
254   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
255     if (ss.at_return_type()) break;
256     if (!first) { os->print(", "); }
257     if (ss.is_array()) {
258       print_array(os, ss);
259     } else if (ss.is_reference()) {
260       print_class(os, ss);
261     } else {
262       os->print("%s", type2name(ss.type()));
263     }
264     first = false;
265   }
266 }
267 
268 void Symbol::print_as_field_external_type(outputStream *os) {
269   SignatureStream ss(this, false);
270   assert(!ss.is_done(), "must have at least one element in field ref");
271   assert(!ss.at_return_type(), "field ref cannot be a return type");
272   assert(!Signature::is_method(this), "field ref cannot be a method");
273 
274   if (ss.is_array()) {
275     print_array(os, ss);
276   } else if (ss.is_reference()) {
277     print_class(os, ss);
278   } else {
279     os->print("%s", type2name(ss.type()));
280   }
281 #ifdef ASSERT
282   ss.next();
283   assert(ss.is_done(), "must have at most one element in field ref");
284 #endif
285 }
286 
287 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
288 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
289 // lookup to avoid reviving a dead Symbol.
290 bool Symbol::try_increment_refcount() {
291   uint32_t found = _hash_and_refcount;
292   while (true) {
293     uint32_t old_value = found;
294     int refc = extract_refcount(old_value);
295     if (refc == PERM_REFCOUNT) {
296       return true;  // sticky max or created permanent
297     } else if (refc == 0) {
298       return false; // dead, can't revive.
299     } else {
300       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value + 1);
301       if (found == old_value) {
302         return true; // successfully updated.
303       }
304       // refcount changed, try again.
305     }
306   }
307 }
308 
309 // The increment_refcount() is called when not doing lookup. It is assumed that you
310 // have a symbol with a non-zero refcount and it can't become zero while referenced by
311 // this caller.
312 void Symbol::increment_refcount() {
313   if (!try_increment_refcount()) {
314     print();
315     fatal("refcount has gone to zero");
316   }
317 #ifndef PRODUCT
318   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
319     NOT_PRODUCT(Atomic::inc(&_total_count);)
320   }
321 #endif
322 }
323 
324 // Decrement refcount potentially while racing increment, so we need
325 // to check the value after attempting to decrement so that if another
326 // thread increments to PERM_REFCOUNT the value is not decremented.
327 void Symbol::decrement_refcount() {
328   uint32_t found = _hash_and_refcount;
329   while (true) {
330     uint32_t old_value = found;
331     int refc = extract_refcount(old_value);
332     if (refc == PERM_REFCOUNT) {
333       return;  // refcount is permanent, permanent is sticky
334     } else if (refc == 0) {
335       print();
336       fatal("refcount underflow");
337       return;
338     } else {
339       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, old_value - 1);
340       if (found == old_value) {
341         return;  // successfully updated.
342       }
343       // refcount changed, try again.
344     }
345   }
346 }
347 
348 void Symbol::make_permanent() {
349   uint32_t found = _hash_and_refcount;
350   while (true) {
351     uint32_t old_value = found;
352     int refc = extract_refcount(old_value);
353     if (refc == PERM_REFCOUNT) {
354       return;  // refcount is permanent, permanent is sticky
355     } else if (refc == 0) {
356       print();
357       fatal("refcount underflow");
358       return;
359     } else {
360       short hash = extract_hash(old_value);
361       found = Atomic::cmpxchg(&_hash_and_refcount, old_value, pack_hash_and_refcount(hash, PERM_REFCOUNT));
362       if (found == old_value) {
363         return;  // successfully updated.
364       }
365       // refcount changed, try again.
366     }
367   }
368 }
369 
370 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
371   if (log_is_enabled(Trace, cds)) {
372     LogStream trace_stream(Log(cds)::trace());
373     trace_stream.print("Iter(Symbol): %p ", this);
374     print_value_on(&trace_stream);
375     trace_stream.cr();
376   }
377 }
378 
379 void Symbol::print_on(outputStream* st) const {
380   st->print("Symbol: '");
381   print_symbol_on(st);
382   st->print("'");
383   st->print(" count %d", refcount());
384 }
385 
386 void Symbol::print() const { print_on(tty); }
387 
388 // The print_value functions are present in all builds, to support the
389 // disassembler and error reporting.
390 void Symbol::print_value_on(outputStream* st) const {
391   st->print_raw("'", 1);
392   st->print_raw((const char*)base(), utf8_length());
393   st->print_raw("'", 1);
394 }
395 
396 void Symbol::print_value() const { print_value_on(tty); }
397 
398 bool Symbol::is_valid(Symbol* s) {
399   if (!is_aligned(s, sizeof(MetaWord))) return false;
400   if ((size_t)s < os::min_page_size()) return false;
401 
402   if (!os::is_readable_range(s, s + 1)) return false;
403 
404   // Symbols are not allocated in Java heap.
405   if (Universe::heap()->is_in(s)) return false;
406 
407   int len = s->utf8_length();
408   if (len < 0) return false;
409 
410   jbyte* bytes = (jbyte*) s->bytes();
411   return os::is_readable_range(bytes, bytes + len);
412 }
413 
414 // SymbolTable prints this in its statistics
415 NOT_PRODUCT(size_t Symbol::_total_count = 0;)
416 
417 #ifndef PRODUCT
418 bool Symbol::is_valid_id(vmSymbolID vm_symbol_id) {
419   return vmSymbols::is_valid_id(vm_symbol_id);
420 }
421 #endif