1 /* 2 * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jvm_io.h" 27 #include "memory/allocation.hpp" 28 #include "utilities/debug.hpp" 29 #include "utilities/ostream.hpp" 30 #include "utilities/stringUtils.hpp" 31 32 #include <ctype.h> 33 #include <string.h> 34 35 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) { 36 int replace_count = 0; 37 size_t from_len = strlen(from); 38 size_t to_len = strlen(to); 39 assert(from_len >= to_len, "must not expand input"); 40 41 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) { 42 char* left_over = dst + from_len; 43 memmove(dst, to, to_len); // does not copy trailing 0 of <to> 44 dst += to_len; // skip over the replacement. 45 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over> 46 ++ replace_count; 47 } 48 49 return replace_count; 50 } 51 52 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) { 53 assert(str1 != nullptr && str2 != nullptr, "sanity"); 54 55 // filter out zero-length strings else we will underflow on len-1 below 56 if (len1 == 0 || len2 == 0) { 57 return 0.0; 58 } 59 60 size_t total = len1 + len2; 61 size_t hit = 0; 62 63 for (size_t i = 0; i < len1 - 1; i++) { 64 for (size_t j = 0; j < len2 - 1; j++) { 65 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) { 66 ++hit; 67 break; 68 } 69 } 70 } 71 72 return 2.0 * (double) hit / (double) total; 73 } 74 75 class StringMatcher { 76 public: 77 typedef int getc_function_t(const char* &source, const char* limit); 78 79 private: 80 // These do not get properly inlined. 81 // For full performance, this should be a template class 82 // parameterized by two function arguments. 83 getc_function_t* _pattern_getc; 84 getc_function_t* _string_getc; 85 86 public: 87 StringMatcher(getc_function_t pattern_getc, 88 getc_function_t string_getc) 89 : _pattern_getc(pattern_getc), 90 _string_getc(string_getc) 91 { } 92 93 enum { // special results from _pattern_getc 94 string_match_comma = -0x100 + ',', 95 string_match_star = -0x100 + '*', 96 string_match_eos = -0x100 + '\0' 97 }; 98 99 private: 100 const char* 101 skip_anchor_word(const char* match, 102 const char* match_end, 103 int anchor_length, 104 const char* pattern, 105 const char* pattern_end) { 106 assert(pattern < pattern_end && anchor_length > 0, ""); 107 const char* begp = pattern; 108 int ch1 = _pattern_getc(begp, pattern_end); 109 // note that begp is now advanced over ch1 110 assert(ch1 > 0, "regular char only"); 111 const char* matchp = match; 112 const char* limitp = match_end - anchor_length; 113 while (matchp <= limitp) { 114 int mch = _string_getc(matchp, match_end); 115 if (mch == ch1) { 116 const char* patp = begp; 117 const char* anchorp = matchp; 118 while (patp < pattern_end) { 119 char ch = _pattern_getc(patp, pattern_end); 120 char mch = _string_getc(anchorp, match_end); 121 if (mch != ch) { 122 anchorp = nullptr; 123 break; 124 } 125 } 126 if (anchorp != nullptr) { 127 return anchorp; // Found a full copy of the anchor. 128 } 129 // That did not work, so restart the search for ch1. 130 } 131 } 132 return nullptr; 133 } 134 135 public: 136 bool string_match(const char* pattern, 137 const char* string) { 138 return string_match(pattern, pattern + strlen(pattern), 139 string, string + strlen(string)); 140 } 141 bool string_match(const char* pattern, const char* pattern_end, 142 const char* string, const char* string_end) { 143 const char* patp = pattern; 144 switch (_pattern_getc(patp, pattern_end)) { 145 case string_match_eos: 146 return false; // Empty pattern is always false. 147 case string_match_star: 148 if (patp == pattern_end) { 149 return true; // Lone star pattern is always true. 150 } 151 break; 152 } 153 patp = pattern; // Reset after lookahead. 154 const char* matchp = string; // nullptr if failing 155 for (;;) { 156 int ch = _pattern_getc(patp, pattern_end); 157 switch (ch) { 158 case string_match_eos: 159 case string_match_comma: 160 // End of a list item; see if it's a match. 161 if (matchp == string_end) { 162 return true; 163 } 164 if (ch == string_match_comma) { 165 // Get ready to match the next item. 166 matchp = string; 167 continue; 168 } 169 return false; // End of all items. 170 171 case string_match_star: 172 if (matchp != nullptr) { 173 // Wildcard: Parse out following anchor word and look for it. 174 const char* begp = patp; 175 const char* endp = patp; 176 int anchor_len = 0; 177 for (;;) { 178 // get as many following regular characters as possible 179 endp = patp; 180 ch = _pattern_getc(patp, pattern_end); 181 if (ch <= 0) { 182 break; 183 } 184 anchor_len += 1; 185 } 186 // Anchor word [begp..endp) does not contain ch, so back up. 187 // Now do an eager match to the anchor word, and commit to it. 188 patp = endp; 189 if (ch == string_match_eos || 190 ch == string_match_comma) { 191 // Anchor word is at end of pattern, so treat it as a fixed pattern. 192 const char* limitp = string_end - anchor_len; 193 matchp = limitp; 194 patp = begp; 195 // Resume normal scanning at the only possible match position. 196 continue; 197 } 198 // Find a floating occurrence of the anchor and continue matching. 199 // Note: This is greedy; there is no backtrack here. Good enough. 200 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp); 201 } 202 continue; 203 } 204 // Normal character. 205 if (matchp != nullptr) { 206 int mch = _string_getc(matchp, string_end); 207 if (mch != ch) { 208 matchp = nullptr; 209 } 210 } 211 } 212 } 213 }; 214 215 // Match a wildcarded class list to a proposed class name (in internal form). 216 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards. 217 class ClassListMatcher : public StringMatcher { 218 public: 219 ClassListMatcher() 220 : StringMatcher(pattern_list_getc, class_name_getc) 221 { } 222 223 private: 224 static int pattern_list_getc(const char* &pattern_ptr, 225 const char* pattern_end) { 226 if (pattern_ptr == pattern_end) { 227 return string_match_eos; 228 } 229 int ch = (unsigned char) *pattern_ptr++; 230 switch (ch) { 231 case ' ': case '\t': case '\n': case '\r': 232 case ',': 233 // End of list item. 234 for (;;) { 235 switch (*pattern_ptr) { 236 case ' ': case '\t': case '\n': case '\r': 237 case ',': 238 pattern_ptr += 1; // Collapse multiple commas or spaces. 239 continue; 240 } 241 break; 242 } 243 return string_match_comma; 244 245 case '*': 246 // Wildcard, matching any number of chars. 247 while (*pattern_ptr == '*') { 248 pattern_ptr += 1; // Collapse multiple stars. 249 } 250 return string_match_star; 251 252 case '.': 253 ch = '/'; // Look for internal form of package separator 254 break; 255 256 case '\\': 257 // Superquote in pattern escapes * , whitespace, and itself. 258 if (pattern_ptr < pattern_end) { 259 ch = (unsigned char) *pattern_ptr++; 260 } 261 break; 262 } 263 264 assert(ch > 0, "regular char only"); 265 return ch; 266 } 267 268 static int class_name_getc(const char* &name_ptr, 269 const char* name_end) { 270 if (name_ptr == name_end) { 271 return string_match_eos; 272 } 273 int ch = (unsigned char) *name_ptr++; 274 if (ch == '.') { 275 ch = '/'; // Normalize to internal form of package separator 276 } 277 return ch; // plain character 278 } 279 }; 280 281 bool StringUtils::class_list_match(const char* class_pattern_list, 282 const char* class_name) { 283 if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0') 284 return false; 285 ClassListMatcher clm; 286 return clm.string_match(class_pattern_list, class_name); 287 } 288 289 290 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) { 291 if (needle[0] == '\0') { 292 return haystack; // empty needle matches with anything 293 } 294 for (size_t i = 0; haystack[i] != '\0'; i++) { 295 bool matches = true; 296 for (size_t j = 0; needle[j] != '\0'; j++) { 297 if (haystack[i + j] == '\0') { 298 return nullptr; // hit end of haystack, abort 299 } 300 if (tolower(haystack[i + j]) != tolower(needle[j])) { 301 matches = false; 302 break; // abort, try next i 303 } 304 } 305 if (matches) { 306 return &haystack[i]; // all j were ok for this i 307 } 308 } 309 return nullptr; // no i was a match 310 } 311 312 bool StringUtils::is_star_match(const char* star_pattern, const char* str) { 313 const int N = 1000; 314 char pattern[N]; // copy pattern into this to ensure null termination 315 jio_snprintf(pattern, N, "%s", star_pattern);// ensures null termination 316 char buf[N]; // copy parts of pattern into this 317 const char* str_idx = str; 318 const char* pattern_idx = pattern; 319 while (strlen(pattern_idx) > 0) { 320 // find next section in pattern 321 const char* pattern_part_end = strstr(pattern_idx, "*"); 322 const char* pattern_part = pattern_idx; 323 if (pattern_part_end != nullptr) { // copy part into buffer 324 size_t pattern_part_len = pattern_part_end-pattern_part; 325 strncpy(buf, pattern_part, pattern_part_len); 326 buf[pattern_part_len] = '\0'; // end of string 327 pattern_part = buf; 328 } 329 // find this section in s, case insensitive 330 const char* str_match = strstr_nocase(str_idx, pattern_part); 331 if (str_match == nullptr) { 332 return false; // r_part did not match - abort 333 } 334 size_t match_len = strlen(pattern_part); 335 // advance to match position plus part length 336 str_idx = str_match + match_len; 337 // advance by part length and "*" 338 pattern_idx += match_len + (pattern_part_end == nullptr ? 0 : 1); 339 } 340 return true; // all parts of pattern matched 341 } 342 343 StringUtils::CommaSeparatedStringIterator::~CommaSeparatedStringIterator() { 344 FREE_C_HEAP_ARRAY(char, _list); 345 } 346 347 ccstrlist StringUtils::CommaSeparatedStringIterator::canonicalize(ccstrlist option_value) { 348 char* canonicalized_list = NEW_C_HEAP_ARRAY(char, strlen(option_value) + 1, mtCompiler); 349 int i = 0; 350 char current; 351 while ((current = option_value[i]) != '\0') { 352 if (current == '\n' || current == ' ') { 353 canonicalized_list[i] = ','; 354 } else { 355 canonicalized_list[i] = current; 356 } 357 i++; 358 } 359 canonicalized_list[i] = '\0'; 360 return canonicalized_list; 361 }