1 /*
  2  * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "jvm_io.h"
 27 #include "memory/allocation.hpp"
 28 #include "utilities/debug.hpp"
 29 #include "utilities/ostream.hpp"
 30 #include "utilities/stringUtils.hpp"
 31 
 32 #include <ctype.h>
 33 #include <string.h>
 34 
 35 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
 36   int replace_count = 0;
 37   size_t from_len = strlen(from);
 38   size_t to_len = strlen(to);
 39   assert(from_len >= to_len, "must not expand input");
 40 
 41   for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
 42     char* left_over = dst + from_len;
 43     memmove(dst, to, to_len);                       // does not copy trailing 0 of <to>
 44     dst += to_len;                                  // skip over the replacement.
 45     memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
 46     ++ replace_count;
 47   }
 48 
 49   return replace_count;
 50 }
 51 
 52 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
 53   assert(str1 != nullptr && str2 != nullptr, "sanity");
 54 
 55   // filter out zero-length strings else we will underflow on len-1 below
 56   if (len1 == 0 || len2 == 0) {
 57     return 0.0;
 58   }
 59 
 60   size_t total = len1 + len2;
 61   size_t hit = 0;
 62 
 63   for (size_t i = 0; i < len1 - 1; i++) {
 64     for (size_t j = 0; j < len2 - 1; j++) {
 65       if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
 66         ++hit;
 67         break;
 68       }
 69     }
 70   }
 71 
 72   return 2.0 * (double) hit / (double) total;
 73 }
 74 
 75 class StringMatcher {
 76  public:
 77   typedef int getc_function_t(const char* &source, const char* limit);
 78 
 79  private:
 80   // These do not get properly inlined.
 81   // For full performance, this should be a template class
 82   // parameterized by two function arguments.
 83   getc_function_t* _pattern_getc;
 84   getc_function_t* _string_getc;
 85 
 86  public:
 87   StringMatcher(getc_function_t pattern_getc,
 88                 getc_function_t string_getc)
 89     : _pattern_getc(pattern_getc),
 90       _string_getc(string_getc)
 91   { }
 92 
 93   enum {  // special results from _pattern_getc
 94     string_match_comma  = -0x100 + ',',
 95     string_match_star   = -0x100 + '*',
 96     string_match_eos    = -0x100 + '\0'
 97   };
 98 
 99  private:
100   const char*
101   skip_anchor_word(const char* match,
102                    const char* match_end,
103                    int anchor_length,
104                    const char* pattern,
105                    const char* pattern_end) {
106     assert(pattern < pattern_end && anchor_length > 0, "");
107     const char* begp = pattern;
108     int ch1 = _pattern_getc(begp, pattern_end);
109     // note that begp is now advanced over ch1
110     assert(ch1 > 0, "regular char only");
111     const char* matchp = match;
112     const char* limitp = match_end - anchor_length;
113     while (matchp <= limitp) {
114       int mch = _string_getc(matchp, match_end);
115       if (mch == ch1) {
116         const char* patp = begp;
117         const char* anchorp = matchp;
118         while (patp < pattern_end) {
119           char ch = _pattern_getc(patp, pattern_end);
120           char mch = _string_getc(anchorp, match_end);
121           if (mch != ch) {
122             anchorp = nullptr;
123             break;
124           }
125         }
126         if (anchorp != nullptr) {
127           return anchorp;  // Found a full copy of the anchor.
128         }
129         // That did not work, so restart the search for ch1.
130       }
131     }
132     return nullptr;
133   }
134 
135  public:
136   bool string_match(const char* pattern,
137                     const char* string) {
138     return string_match(pattern, pattern + strlen(pattern),
139                         string, string + strlen(string));
140   }
141   bool string_match(const char* pattern, const char* pattern_end,
142                     const char* string, const char* string_end) {
143     const char* patp = pattern;
144     switch (_pattern_getc(patp, pattern_end)) {
145     case string_match_eos:
146       return false;  // Empty pattern is always false.
147     case string_match_star:
148       if (patp == pattern_end) {
149         return true;   // Lone star pattern is always true.
150       }
151       break;
152     }
153     patp = pattern;  // Reset after lookahead.
154     const char* matchp = string;  // nullptr if failing
155     for (;;) {
156       int ch = _pattern_getc(patp, pattern_end);
157       switch (ch) {
158       case string_match_eos:
159       case string_match_comma:
160         // End of a list item; see if it's a match.
161         if (matchp == string_end) {
162           return true;
163         }
164         if (ch == string_match_comma) {
165           // Get ready to match the next item.
166           matchp = string;
167           continue;
168         }
169         return false;  // End of all items.
170 
171       case string_match_star:
172         if (matchp != nullptr) {
173           // Wildcard:  Parse out following anchor word and look for it.
174           const char* begp = patp;
175           const char* endp = patp;
176           int anchor_len = 0;
177           for (;;) {
178             // get as many following regular characters as possible
179             endp = patp;
180             ch = _pattern_getc(patp, pattern_end);
181             if (ch <= 0) {
182               break;
183             }
184             anchor_len += 1;
185           }
186           // Anchor word [begp..endp) does not contain ch, so back up.
187           // Now do an eager match to the anchor word, and commit to it.
188           patp = endp;
189           if (ch == string_match_eos ||
190               ch == string_match_comma) {
191             // Anchor word is at end of pattern, so treat it as a fixed pattern.
192             const char* limitp = string_end - anchor_len;
193             matchp = limitp;
194             patp = begp;
195             // Resume normal scanning at the only possible match position.
196             continue;
197           }
198           // Find a floating occurrence of the anchor and continue matching.
199           // Note:  This is greedy; there is no backtrack here.  Good enough.
200           matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
201         }
202         continue;
203       }
204       // Normal character.
205       if (matchp != nullptr) {
206         int mch = _string_getc(matchp, string_end);
207         if (mch != ch) {
208           matchp = nullptr;
209         }
210       }
211     }
212   }
213 };
214 
215 // Match a wildcarded class list to a proposed class name (in internal form).
216 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
217 class ClassListMatcher : public StringMatcher {
218  public:
219   ClassListMatcher()
220     : StringMatcher(pattern_list_getc, class_name_getc)
221   { }
222 
223  private:
224   static int pattern_list_getc(const char* &pattern_ptr,
225                                const char* pattern_end) {
226     if (pattern_ptr == pattern_end) {
227       return string_match_eos;
228     }
229     int ch = (unsigned char) *pattern_ptr++;
230     switch (ch) {
231     case ' ': case '\t': case '\n': case '\r':
232     case ',':
233       // End of list item.
234       for (;;) {
235         switch (*pattern_ptr) {
236         case ' ': case '\t': case '\n': case '\r':
237         case ',':
238           pattern_ptr += 1;  // Collapse multiple commas or spaces.
239           continue;
240         }
241         break;
242       }
243       return string_match_comma;
244 
245     case '*':
246       // Wildcard, matching any number of chars.
247       while (*pattern_ptr == '*') {
248         pattern_ptr += 1;  // Collapse multiple stars.
249       }
250       return string_match_star;
251 
252     case '.':
253       ch = '/';   // Look for internal form of package separator
254       break;
255 
256     case '\\':
257       // Superquote in pattern escapes * , whitespace, and itself.
258       if (pattern_ptr < pattern_end) {
259         ch = (unsigned char) *pattern_ptr++;
260       }
261       break;
262     }
263 
264     assert(ch > 0, "regular char only");
265     return ch;
266   }
267 
268   static int class_name_getc(const char* &name_ptr,
269                              const char* name_end) {
270     if (name_ptr == name_end) {
271       return string_match_eos;
272     }
273     int ch = (unsigned char) *name_ptr++;
274     if (ch == '.') {
275       ch = '/';   // Normalize to internal form of package separator
276     }
277     return ch;  // plain character
278   }
279 };
280 
281 bool StringUtils::class_list_match(const char* class_pattern_list,
282                                    const char* class_name) {
283   if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0')
284     return false;
285   ClassListMatcher clm;
286   return clm.string_match(class_pattern_list, class_name);
287 }
288 
289 
290 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
291   if (needle[0] == '\0') {
292     return haystack; // empty needle matches with anything
293   }
294   for (size_t i = 0; haystack[i] != '\0'; i++) {
295     bool matches = true;
296     for (size_t j = 0; needle[j] != '\0'; j++) {
297       if (haystack[i + j] == '\0') {
298         return nullptr; // hit end of haystack, abort
299       }
300       if (tolower(haystack[i + j]) != tolower(needle[j])) {
301         matches = false;
302         break; // abort, try next i
303       }
304     }
305     if (matches) {
306       return &haystack[i]; // all j were ok for this i
307     }
308   }
309   return nullptr; // no i was a match
310 }
311 
312 bool StringUtils::is_star_match(const char* star_pattern, const char* str) {
313   const int N = 1000;
314   char pattern[N]; // copy pattern into this to ensure null termination
315   jio_snprintf(pattern, N, "%s", star_pattern);// ensures null termination
316   char buf[N]; // copy parts of pattern into this
317   const char* str_idx = str;
318   const char* pattern_idx = pattern;
319   while (strlen(pattern_idx) > 0) {
320     // find next section in pattern
321     const char* pattern_part_end = strstr(pattern_idx, "*");
322     const char* pattern_part = pattern_idx;
323     if (pattern_part_end != nullptr) { // copy part into buffer
324       size_t pattern_part_len = pattern_part_end-pattern_part;
325       strncpy(buf, pattern_part, pattern_part_len);
326       buf[pattern_part_len] = '\0'; // end of string
327       pattern_part = buf;
328     }
329     // find this section in s, case insensitive
330     const char* str_match = strstr_nocase(str_idx, pattern_part);
331     if (str_match == nullptr) {
332       return false; // r_part did not match - abort
333     }
334     size_t match_len = strlen(pattern_part);
335     // advance to match position plus part length
336     str_idx = str_match + match_len;
337     // advance by part length and "*"
338     pattern_idx += match_len + (pattern_part_end == nullptr ? 0 : 1);
339   }
340   return true; // all parts of pattern matched
341 }
342 
343 StringUtils::CommaSeparatedStringIterator::~CommaSeparatedStringIterator() {
344   FREE_C_HEAP_ARRAY(char, _list);
345 }
346 
347 ccstrlist StringUtils::CommaSeparatedStringIterator::canonicalize(ccstrlist option_value) {
348   char* canonicalized_list = NEW_C_HEAP_ARRAY(char, strlen(option_value) + 1, mtCompiler);
349   int i = 0;
350   char current;
351   while ((current = option_value[i]) != '\0') {
352     if (current == '\n' || current == ' ') {
353       canonicalized_list[i] = ',';
354     } else {
355       canonicalized_list[i] = current;
356     }
357     i++;
358   }
359   canonicalized_list[i] = '\0';
360   return canonicalized_list;
361 }