New src/hotspot/share/utilities/stringUtils.cpp

  1 /*
  2  * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "jvm_io.h"
 26 #include "memory/allocation.hpp"
 27 #include "utilities/debug.hpp"
 28 #include "utilities/ostream.hpp"
 29 #include "utilities/stringUtils.hpp"
 30 
 31 #include <ctype.h>
 32 #include <string.h>
 33 
 34 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
 35   int replace_count = 0;
 36   size_t from_len = strlen(from);
 37   size_t to_len = strlen(to);
 38   assert(from_len >= to_len, "must not expand input");
 39 
 40   for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
 41     char* left_over = dst + from_len;
 42     memmove(dst, to, to_len);                       // does not copy trailing 0 of <to>
 43     dst += to_len;                                  // skip over the replacement.
 44     memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
 45     ++ replace_count;
 46   }
 47 
 48   return replace_count;
 49 }
 50 
 51 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
 52   assert(str1 != nullptr && str2 != nullptr, "sanity");
 53 
 54   // filter out zero-length strings else we will underflow on len-1 below
 55   if (len1 == 0 || len2 == 0) {
 56     return 0.0;
 57   }
 58 
 59   size_t total = len1 + len2;
 60   size_t hit = 0;
 61 
 62   for (size_t i = 0; i < len1 - 1; i++) {
 63     for (size_t j = 0; j < len2 - 1; j++) {
 64       if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
 65         ++hit;
 66         break;
 67       }
 68     }
 69   }
 70 
 71   return 2.0 * (double) hit / (double) total;
 72 }
 73 
 74 class StringMatcher {
 75  public:
 76   typedef int getc_function_t(const char* &source, const char* limit);
 77 
 78  private:
 79   // These do not get properly inlined.
 80   // For full performance, this should be a template class
 81   // parameterized by two function arguments.
 82   getc_function_t* _pattern_getc;
 83   getc_function_t* _string_getc;
 84 
 85  public:
 86   StringMatcher(getc_function_t pattern_getc,
 87                 getc_function_t string_getc)
 88     : _pattern_getc(pattern_getc),
 89       _string_getc(string_getc)
 90   { }
 91 
 92   enum {  // special results from _pattern_getc
 93     string_match_comma  = -0x100 + ',',
 94     string_match_star   = -0x100 + '*',
 95     string_match_eos    = -0x100 + '\0'
 96   };
 97 
 98  private:
 99   const char*
100   skip_anchor_word(const char* match,
101                    const char* match_end,
102                    int anchor_length,
103                    const char* pattern,
104                    const char* pattern_end) {
105     assert(pattern < pattern_end && anchor_length > 0, "");
106     const char* begp = pattern;
107     int ch1 = _pattern_getc(begp, pattern_end);
108     // note that begp is now advanced over ch1
109     assert(ch1 > 0, "regular char only");
110     const char* matchp = match;
111     const char* limitp = match_end - anchor_length;
112     while (matchp <= limitp) {
113       int mch = _string_getc(matchp, match_end);
114       if (mch == ch1) {
115         const char* patp = begp;
116         const char* anchorp = matchp;
117         while (patp < pattern_end) {
118           char ch = _pattern_getc(patp, pattern_end);
119           char mch = _string_getc(anchorp, match_end);
120           if (mch != ch) {
121             anchorp = nullptr;
122             break;
123           }
124         }
125         if (anchorp != nullptr) {
126           return anchorp;  // Found a full copy of the anchor.
127         }
128         // That did not work, so restart the search for ch1.
129       }
130     }
131     return nullptr;
132   }
133 
134  public:
135   bool string_match(const char* pattern,
136                     const char* string) {
137     return string_match(pattern, pattern + strlen(pattern),
138                         string, string + strlen(string));
139   }
140   bool string_match(const char* pattern, const char* pattern_end,
141                     const char* string, const char* string_end) {
142     const char* patp = pattern;
143     switch (_pattern_getc(patp, pattern_end)) {
144     case string_match_eos:
145       return false;  // Empty pattern is always false.
146     case string_match_star:
147       if (patp == pattern_end) {
148         return true;   // Lone star pattern is always true.
149       }
150       break;
151     }
152     patp = pattern;  // Reset after lookahead.
153     const char* matchp = string;  // nullptr if failing
154     for (;;) {
155       int ch = _pattern_getc(patp, pattern_end);
156       switch (ch) {
157       case string_match_eos:
158       case string_match_comma:
159         // End of a list item; see if it's a match.
160         if (matchp == string_end) {
161           return true;
162         }
163         if (ch == string_match_comma) {
164           // Get ready to match the next item.
165           matchp = string;
166           continue;
167         }
168         return false;  // End of all items.
169 
170       case string_match_star:
171         if (matchp != nullptr) {
172           // Wildcard:  Parse out following anchor word and look for it.
173           const char* begp = patp;
174           const char* endp = patp;
175           int anchor_len = 0;
176           for (;;) {
177             // get as many following regular characters as possible
178             endp = patp;
179             ch = _pattern_getc(patp, pattern_end);
180             if (ch <= 0) {
181               break;
182             }
183             anchor_len += 1;
184           }
185           // Anchor word [begp..endp) does not contain ch, so back up.
186           // Now do an eager match to the anchor word, and commit to it.
187           patp = endp;
188           if (ch == string_match_eos ||
189               ch == string_match_comma) {
190             // Anchor word is at end of pattern, so treat it as a fixed pattern.
191             const char* limitp = string_end - anchor_len;
192             matchp = limitp;
193             patp = begp;
194             // Resume normal scanning at the only possible match position.
195             continue;
196           }
197           // Find a floating occurrence of the anchor and continue matching.
198           // Note:  This is greedy; there is no backtrack here.  Good enough.
199           matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
200         }
201         continue;
202       }
203       // Normal character.
204       if (matchp != nullptr) {
205         int mch = _string_getc(matchp, string_end);
206         if (mch != ch) {
207           matchp = nullptr;
208         }
209       }
210     }
211   }
212 };
213 
214 // Match a wildcarded class list to a proposed class name (in internal form).
215 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
216 class ClassListMatcher : public StringMatcher {
217  public:
218   ClassListMatcher()
219     : StringMatcher(pattern_list_getc, class_name_getc)
220   { }
221 
222  private:
223   static int pattern_list_getc(const char* &pattern_ptr,
224                                const char* pattern_end) {
225     if (pattern_ptr == pattern_end) {
226       return string_match_eos;
227     }
228     int ch = (unsigned char) *pattern_ptr++;
229     switch (ch) {
230     case ' ': case '\t': case '\n': case '\r':
231     case ',':
232       // End of list item.
233       for (;;) {
234         switch (*pattern_ptr) {
235         case ' ': case '\t': case '\n': case '\r':
236         case ',':
237           pattern_ptr += 1;  // Collapse multiple commas or spaces.
238           continue;
239         }
240         break;
241       }
242       return string_match_comma;
243 
244     case '*':
245       // Wildcard, matching any number of chars.
246       while (*pattern_ptr == '*') {
247         pattern_ptr += 1;  // Collapse multiple stars.
248       }
249       return string_match_star;
250 
251     case '.':
252       ch = '/';   // Look for internal form of package separator
253       break;
254 
255     case '\\':
256       // Superquote in pattern escapes * , whitespace, and itself.
257       if (pattern_ptr < pattern_end) {
258         ch = (unsigned char) *pattern_ptr++;
259       }
260       break;
261     }
262 
263     assert(ch > 0, "regular char only");
264     return ch;
265   }
266 
267   static int class_name_getc(const char* &name_ptr,
268                              const char* name_end) {
269     if (name_ptr == name_end) {
270       return string_match_eos;
271     }
272     int ch = (unsigned char) *name_ptr++;
273     if (ch == '.') {
274       ch = '/';   // Normalize to internal form of package separator
275     }
276     return ch;  // plain character
277   }
278 };
279 
280 bool StringUtils::class_list_match(const char* class_pattern_list,
281                                    const char* class_name) {
282   if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0')
283     return false;
284   ClassListMatcher clm;
285   return clm.string_match(class_pattern_list, class_name);
286 }
287 
288 
289 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
290   if (needle[0] == '\0') {
291     return haystack; // empty needle matches with anything
292   }
293   for (size_t i = 0; haystack[i] != '\0'; i++) {
294     bool matches = true;
295     for (size_t j = 0; needle[j] != '\0'; j++) {
296       if (haystack[i + j] == '\0') {
297         return nullptr; // hit end of haystack, abort
298       }
299       if (tolower(haystack[i + j]) != tolower(needle[j])) {
300         matches = false;
301         break; // abort, try next i
302       }
303     }
304     if (matches) {
305       return &haystack[i]; // all j were ok for this i
306     }
307   }
308   return nullptr; // no i was a match
309 }
310 
311 bool StringUtils::is_star_match(const char* star_pattern, const char* str) {
312   const int N = 1000;
313   char pattern[N]; // copy pattern into this to ensure null termination
314   jio_snprintf(pattern, N, "%s", star_pattern);// ensures null termination
315   char buf[N]; // copy parts of pattern into this
316   const char* str_idx = str;
317   const char* pattern_idx = pattern;
318   while (strlen(pattern_idx) > 0) {
319     // find next section in pattern
320     const char* pattern_part_end = strstr(pattern_idx, "*");
321     const char* pattern_part = pattern_idx;
322     if (pattern_part_end != nullptr) { // copy part into buffer
323       size_t pattern_part_len = pattern_part_end-pattern_part;
324       strncpy(buf, pattern_part, pattern_part_len);
325       buf[pattern_part_len] = '\0'; // end of string
326       pattern_part = buf;
327     }
328     // find this section in s, case insensitive
329     const char* str_match = strstr_nocase(str_idx, pattern_part);
330     if (str_match == nullptr) {
331       return false; // r_part did not match - abort
332     }
333     size_t match_len = strlen(pattern_part);
334     // advance to match position plus part length
335     str_idx = str_match + match_len;
336     // advance by part length and "*"
337     pattern_idx += match_len + (pattern_part_end == nullptr ? 0 : 1);
338   }
339   return true; // all parts of pattern matched
340 }
341 
342 StringUtils::CommaSeparatedStringIterator::~CommaSeparatedStringIterator() {
343   FREE_C_HEAP_ARRAY(char, _list);
344 }
345 
346 ccstrlist StringUtils::CommaSeparatedStringIterator::canonicalize(ccstrlist option_value) {
347   char* canonicalized_list = NEW_C_HEAP_ARRAY(char, strlen(option_value) + 1, mtCompiler);
348   int i = 0;
349   char current;
350   while ((current = option_value[i]) != '\0') {
351     if (current == '\n' || current == ' ') {
352       canonicalized_list[i] = ',';
353     } else {
354       canonicalized_list[i] = current;
355     }
356     i++;
357   }
358   canonicalized_list[i] = '\0';
359   return canonicalized_list;
360 }