1 /*
  2  * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "classfile/symbolTable.hpp"
 27 #include "classfile/vmSymbols.hpp"
 28 #include "compiler/compilerOracle.hpp"
 29 #include "compiler/methodMatcher.hpp"
 30 #include "memory/oopFactory.hpp"
 31 #include "memory/resourceArea.hpp"
 32 #include "oops/method.hpp"
 33 #include "oops/oop.inline.hpp"
 34 
 35 // The JVM specification defines the allowed characters.
 36 // Tokens that are disallowed by the JVM specification can have
 37 // a meaning to the parser so we need to include them here.
 38 // The parser does not enforce all rules of the JVMS - a successful parse
 39 // does not mean that it is an allowed name. Illegal names will
 40 // be ignored since they never can match a class or method.
 41 //
 42 // '\0' and 0xf0-0xff are disallowed in constant string values
 43 // 0x20 ' ', 0x09 '\t' and, 0x2c ',' are used in the matching
 44 // 0x5b '[' and 0x5d ']' can not be used because of the matcher
 45 // 0x28 '(' and 0x29 ')' are used for the signature
 46 // 0x2e '.' is always replaced before the matching
 47 // 0x2f '/' is only used in the class name as package separator
 48 //
 49 // It seems hard to get Non-ASCII characters to work in all circumstances due
 50 // to limitations in Windows. So only ASCII characters are supported on Windows.
 51 
 52 #define RANGEBASE_ASCII "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf" \
 53     "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
 54     "\x21\x22\x23\x24\x25\x26\x27\x2a\x2b\x2c\x2d" \
 55     "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
 56     "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
 57     "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5c\x5e\x5f" \
 58     "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
 59     "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
 60 
 61 #define RANGEBASE_NON_ASCII "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \
 62     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \
 63     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \
 64     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \
 65     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \
 66     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \
 67     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 68 
 69 #define RANGEBASE RANGEBASE_ASCII NOT_WINDOWS(RANGEBASE_NON_ASCII)
 70 
 71 #define RANGE0 "[*" RANGEBASE "]"
 72 #define RANGESLASH "[*" RANGEBASE "/]"
 73 
 74 MethodMatcher::MethodMatcher():
 75     _class_name(nullptr)
 76   , _method_name(nullptr)
 77   , _signature(nullptr)
 78   , _class_mode(Exact)
 79   , _method_mode(Exact) {
 80 }
 81 
 82 MethodMatcher::~MethodMatcher() {
 83   if (_class_name != nullptr) {
 84     _class_name->decrement_refcount();
 85   }
 86   if (_method_name != nullptr) {
 87     _method_name->decrement_refcount();
 88   }
 89   if (_signature != nullptr) {
 90     _signature->decrement_refcount();
 91   }
 92 }
 93 
 94 void MethodMatcher::init(Symbol* class_name, Mode class_mode,
 95                              Symbol* method_name, Mode method_mode,
 96                              Symbol* signature) {
 97  _class_mode = class_mode;
 98  _method_mode = method_mode;
 99  _class_name = class_name;
100  _method_name = method_name;
101  _signature = signature;
102 }
103 
104 bool MethodMatcher::canonicalize(char * line, const char *& error_msg) {
105   char* colon = strstr(line, "::");
106   bool have_colon = (colon != nullptr);
107   if (have_colon) {
108     // Don't allow multiple '::'
109     if (colon[2] != '\0') {
110       if (strstr(colon+2, "::")) {
111         error_msg = "Method pattern only allows one '::' allowed";
112         return false;
113       }
114     }
115 
116     char* pos = line;
117     if (pos != nullptr) {
118       for (char* lp = pos + 1; *lp != '\0'; lp++) {
119         if (*lp == '(') {
120           break;
121         }
122 
123         if (*lp == '/') {
124           error_msg = "Method pattern uses '/' together with '::' (tips: replace '/' with '+' for hidden classes)";
125           return false;
126         }
127       }
128     }
129   } else {
130     // Don't allow mixed package separators
131     char* pos = strchr(line, '.');
132     bool in_signature = false;
133     if (pos != nullptr) {
134       for (char* lp = pos + 1; *lp != '\0'; lp++) {
135         if (*lp == '(') {
136           in_signature = true;
137         }
138 
139         // After any comma the method pattern has ended
140         if (*lp == ',') {
141           break;
142         }
143 
144         if (!in_signature && (*lp == '/')) {
145           error_msg = "Method pattern uses mixed '/' and '.' package separators";
146           return false;
147         }
148 
149         if (*lp == '.') {
150           error_msg = "Method pattern uses multiple '.' in pattern";
151           return false;
152         }
153       }
154     }
155   }
156 
157   for (char* lp = line; *lp != '\0'; lp++) {
158     // Allow '.' to separate the class name from the method name.
159     // This is the preferred spelling of methods:
160     //      exclude java/lang/String.indexOf(I)I
161     // Allow ',' for spaces (eases command line quoting).
162     //      exclude,java/lang/String.indexOf
163     // For backward compatibility, allow space as separator also.
164     //      exclude java/lang/String indexOf
165     //      exclude,java/lang/String,indexOf
166     // For easy cut-and-paste of method names, allow VM output format
167     // as produced by Method::print_short_name:
168     //      exclude java.lang.String::indexOf
169     // For simple implementation convenience here, convert them all to space.
170 
171     if (have_colon) {
172       if (*lp == '.')  *lp = '/';   // dots build the package prefix
173       if (*lp == ':')  *lp = ' ';
174     }
175     if (*lp == ',' || *lp == '.')  *lp = ' ';
176 
177 #ifdef _WINDOWS
178     // It seems hard to get Non-ASCII characters to work in all circumstances due
179     // to limitations in Windows. So only ASCII characters are supported on Windows.
180     if (!isascii(*lp)) {
181       error_msg = "Non-ASCII characters are not supported on Windows.";
182       return false;
183     }
184 #endif
185   }
186   return true;
187 }
188 
189 bool MethodMatcher::match(Symbol* candidate, Symbol* match, Mode match_mode) const {
190   if (match_mode == Any) {
191     return true;
192   }
193 
194   if (match_mode == Exact) {
195     return candidate == match;
196   }
197 
198   ResourceMark rm;
199   const char * candidate_string = candidate->as_C_string();
200   const char * match_string = match->as_C_string();
201 
202   switch (match_mode) {
203   case Prefix:
204     return strstr(candidate_string, match_string) == candidate_string;
205 
206   case Suffix: {
207     size_t clen = strlen(candidate_string);
208     size_t mlen = strlen(match_string);
209     return clen >= mlen && strcmp(candidate_string + clen - mlen, match_string) == 0;
210   }
211 
212   case Substring:
213     return strstr(candidate_string, match_string) != nullptr;
214 
215   default:
216     return false;
217   }
218 }
219 
220 static MethodMatcher::Mode check_mode(char name[], const char*& error_msg) {
221   int match = MethodMatcher::Exact;
222   if (name[0] == '*') {
223     if (strlen(name) == 1) {
224       return MethodMatcher::Any;
225     }
226     match |= MethodMatcher::Suffix;
227     memmove(name, name + 1, strlen(name + 1) + 1);
228   }
229 
230   size_t len = strlen(name);
231   if (len > 0 && name[len - 1] == '*') {
232     match |= MethodMatcher::Prefix;
233     name[--len] = '\0';
234   }
235 
236   if (strlen(name) == 0) {
237     error_msg = "** Not a valid pattern";
238     return MethodMatcher::Any;
239   }
240 
241   if (strstr(name, "*") != nullptr) {
242     error_msg = " Embedded * not allowed";
243     return MethodMatcher::Unknown;
244   }
245   return (MethodMatcher::Mode)match;
246 }
247 
248 // Skip any leading spaces
249 static void skip_leading_spaces(char*& line, int* total_bytes_read ) {
250   int bytes_read = 0;
251   sscanf(line, "%*[ \t]%n", &bytes_read);
252   if (bytes_read > 0) {
253     line += bytes_read;
254     *total_bytes_read += bytes_read;
255   }
256 }
257 
258 void MethodMatcher::parse_method_pattern(char*& line, const char*& error_msg, MethodMatcher* matcher) {
259   MethodMatcher::Mode c_match;
260   MethodMatcher::Mode m_match;
261   char class_name[256] = {0};
262   char method_name[256] = {0};
263   char sig[1024] = {0};
264   int bytes_read = 0;
265   int total_bytes_read = 0;
266 
267   assert(error_msg == nullptr, "Dont call here with error_msg already set");
268 
269   if (!MethodMatcher::canonicalize(line, error_msg)) {
270     assert(error_msg != nullptr, "Message must be set if parsing failed");
271     return;
272   }
273 
274   skip_leading_spaces(line, &total_bytes_read);
275   if (*line == '\0') {
276     error_msg = "Method pattern missing from command";
277     return;
278   }
279 
280   if (2 == sscanf(line, "%255" RANGESLASH "%*[ ]" "%255"  RANGE0 "%n", class_name, method_name, &bytes_read)) {
281     c_match = check_mode(class_name, error_msg);
282     m_match = check_mode(method_name, error_msg);
283 
284     // Over-consumption
285     // method_name points to an option type or option name because the method name is not specified by users.
286     // In very rare case, the method name happens to be same as option type/name, so look ahead to make sure
287     // it doesn't show up again.
288     if ((OptionType::Unknown != CompilerOracle::parse_option_type(method_name) ||
289         CompileCommand::Unknown != CompilerOracle::parse_option_name(method_name)) &&
290         *(line + bytes_read) != '\0' &&
291         strstr(line + bytes_read, method_name) == nullptr) {
292       error_msg = "Did not specify any method name";
293       method_name[0] = '\0';
294       return;
295     }
296 
297     if ((strchr(class_name, JVM_SIGNATURE_SPECIAL) != nullptr) ||
298         (strchr(class_name, JVM_SIGNATURE_ENDSPECIAL) != nullptr)) {
299       error_msg = "Chars '<' and '>' not allowed in class name";
300       return;
301     }
302 
303     if ((strchr(method_name, JVM_SIGNATURE_SPECIAL) != nullptr) ||
304         (strchr(method_name, JVM_SIGNATURE_ENDSPECIAL) != nullptr)) {
305       if (!vmSymbols::object_initializer_name()->equals(method_name) &&
306           !vmSymbols::class_initializer_name()->equals(method_name)) {
307         error_msg = "Chars '<' and '>' only allowed in <init>, <clinit>";
308         return;
309       }
310     }
311 
312     if (c_match == MethodMatcher::Unknown || m_match == MethodMatcher::Unknown) {
313       assert(error_msg != nullptr, "Must have been set by check_mode()");
314       return;
315     }
316 
317     EXCEPTION_MARK;
318     Symbol* signature = nullptr;
319     line += bytes_read;
320     bytes_read = 0;
321 
322     skip_leading_spaces(line, &total_bytes_read);
323 
324     // there might be a signature following the method.
325     // signatures always begin with ( so match that by hand
326     if (line[0] == '(') {
327       line++;
328       sig[0] = '(';
329       // scan the rest
330       if (1 == sscanf(line, "%1022[[);/" RANGEBASE "]%n", sig+1, &bytes_read)) {
331         if (strchr(sig, '*') != nullptr) {
332           error_msg = " Wildcard * not allowed in signature";
333           return;
334         }
335         line += bytes_read;
336       }
337       signature = SymbolTable::new_symbol(sig);
338     }
339     Symbol* c_name = SymbolTable::new_symbol(class_name);
340     Symbol* m_name = SymbolTable::new_symbol(method_name);
341 
342     matcher->init(c_name, c_match, m_name, m_match, signature);
343     return;
344   } else {
345     error_msg = "Could not parse method pattern";
346   }
347 }
348 
349 bool MethodMatcher::matches(const methodHandle& method) const {
350   Symbol* class_name  = method->method_holder()->name();
351   Symbol* method_name = method->name();
352   Symbol* signature = method->signature();
353 
354   if (match(class_name, this->class_name(), _class_mode) &&
355       match(method_name, this->method_name(), _method_mode) &&
356       ((this->signature() == nullptr) || match(signature, this->signature(), Prefix))) {
357     return true;
358   }
359   return false;
360 }
361 
362 void MethodMatcher::print_symbol(outputStream* st, Symbol* h, Mode mode) {
363   if (mode == Suffix || mode == Substring || mode == Any) {
364     st->print("*");
365   }
366   if (mode != Any) {
367     h->print_utf8_on(st);
368   }
369   if (mode == Prefix || mode == Substring) {
370     st->print("*");
371   }
372 }
373 
374 void MethodMatcher::print_base(outputStream* st) {
375   ResourceMark rm;
376 
377   print_symbol(st, class_name(), _class_mode);
378   st->print(".");
379   print_symbol(st, method_name(), _method_mode);
380   if (signature() != nullptr) {
381     signature()->print_utf8_on(st);
382   }
383 }
384 
385 BasicMatcher* BasicMatcher::parse_method_pattern(char* line, const char*& error_msg, bool expect_trailing_chars) {
386   assert(error_msg == nullptr, "Don't call here with error_msg already set");
387   BasicMatcher* bm = new BasicMatcher();
388   MethodMatcher::parse_method_pattern(line, error_msg, bm);
389   if (error_msg != nullptr) {
390     delete bm;
391     return nullptr;
392   }
393   if (!expect_trailing_chars) {
394     // check for bad trailing characters
395     int bytes_read = 0;
396     sscanf(line, "%*[ \t]%n", &bytes_read);
397     if (line[bytes_read] != '\0') {
398       error_msg = "Unrecognized trailing text after method pattern";
399       delete bm;
400       return nullptr;
401     }
402   }
403   return bm;
404 }
405 
406 bool BasicMatcher::match(const methodHandle& method) {
407   for (BasicMatcher* current = this; current != nullptr; current = current->next()) {
408     if (current->matches(method)) {
409       return true;
410     }
411   }
412   return false;
413 }
414 
415 void InlineMatcher::print(outputStream* st) {
416   if (_inline_action == InlineMatcher::force_inline) {
417     st->print("+");
418   } else {
419     st->print("-");
420   }
421   print_base(st);
422 }
423 
424 InlineMatcher* InlineMatcher::parse_method_pattern(char* line, const char*& error_msg) {
425   assert(error_msg == nullptr, "Dont call here with error_msg already set");
426   InlineMatcher* im = new InlineMatcher();
427   MethodMatcher::parse_method_pattern(line, error_msg, im);
428   if (error_msg != nullptr) {
429     delete im;
430     return nullptr;
431   }
432   return im;
433 }
434 
435 bool InlineMatcher::match(const methodHandle& method, int inline_action) {
436   for (InlineMatcher* current = this; current != nullptr; current = current->next()) {
437     if (current->matches(method)) {
438       return (current->_inline_action == inline_action);
439     }
440   }
441   return false;
442 }
443 
444 InlineMatcher* InlineMatcher::parse_inline_pattern(char* str, const char*& error_msg) {
445   // check first token is +/-
446   InlineType _inline_action;
447    switch (str[0]) {
448    case '-':
449      _inline_action = InlineMatcher::dont_inline;
450      break;
451    case '+':
452      _inline_action = InlineMatcher::force_inline;
453      break;
454    default:
455      error_msg = "Missing leading inline type (+/-)";
456      return nullptr;
457    }
458    str++;
459 
460    assert(error_msg == nullptr, "error_msg must not be set yet");
461    InlineMatcher* im = InlineMatcher::parse_method_pattern(str, error_msg);
462    if (im == nullptr) {
463      assert(error_msg != nullptr, "Must have error message");
464      return nullptr;
465    }
466    im->set_action(_inline_action);
467    return im;
468 }
469 
470 InlineMatcher* InlineMatcher::clone() {
471    InlineMatcher* m = new InlineMatcher();
472    m->_class_mode =  _class_mode;
473    m->_method_mode = _method_mode;
474    m->_inline_action = _inline_action;
475    m->_class_name = _class_name;
476    if(_class_name != nullptr) {
477      _class_name->increment_refcount();
478    }
479    m->_method_name = _method_name;
480    if (_method_name != nullptr) {
481      _method_name->increment_refcount();
482    }
483    m->_signature = _signature;
484    if (_signature != nullptr) {
485      _signature->increment_refcount();
486    }
487    return m;
488 }