1 /*
2 * Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "jvm_io.h"
26 #include "memory/allocation.hpp"
27 #include "runtime/os.hpp"
28 #include "utilities/debug.hpp"
29 #include "utilities/ostream.hpp"
30 #include "utilities/stringUtils.hpp"
31
32 #include <ctype.h>
33 #include <string.h>
34
35 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
36 int replace_count = 0;
37 size_t from_len = strlen(from);
38 size_t to_len = strlen(to);
39 assert(from_len >= to_len, "must not expand input");
40
41 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
42 char* left_over = dst + from_len;
43 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
44 dst += to_len; // skip over the replacement.
45 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
46 ++ replace_count;
47 }
48
49 return replace_count;
50 }
51
52 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {
53 assert(str1 != nullptr && str2 != nullptr, "sanity");
54
55 // filter out zero-length strings else we will underflow on len-1 below
56 if (len1 == 0 || len2 == 0) {
57 return 0.0;
58 }
59
60 size_t total = len1 + len2;
61 size_t hit = 0;
62
63 for (size_t i = 0; i < len1 - 1; i++) {
64 for (size_t j = 0; j < len2 - 1; j++) {
65 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
66 ++hit;
67 break;
68 }
69 }
70 }
71
72 return 2.0 * (double) hit / (double) total;
73 }
74
75 class StringMatcher {
76 public:
77 typedef int getc_function_t(const char* &source, const char* limit);
78
79 private:
80 // These do not get properly inlined.
81 // For full performance, this should be a template class
82 // parameterized by two function arguments.
83 getc_function_t* _pattern_getc;
84 getc_function_t* _string_getc;
85
86 public:
87 StringMatcher(getc_function_t pattern_getc,
88 getc_function_t string_getc)
89 : _pattern_getc(pattern_getc),
90 _string_getc(string_getc)
91 { }
92
93 enum { // special results from _pattern_getc
94 string_match_comma = -0x100 + ',',
95 string_match_star = -0x100 + '*',
96 string_match_eos = -0x100 + '\0'
97 };
98
99 private:
100 const char*
101 skip_anchor_word(const char* match,
102 const char* match_end,
103 int anchor_length,
104 const char* pattern,
105 const char* pattern_end) {
106 assert(pattern < pattern_end && anchor_length > 0, "");
107 const char* begp = pattern;
108 int ch1 = _pattern_getc(begp, pattern_end);
109 // note that begp is now advanced over ch1
110 assert(ch1 > 0, "regular char only");
111 const char* matchp = match;
112 const char* limitp = match_end - anchor_length;
113 while (matchp <= limitp) {
114 int mch = _string_getc(matchp, match_end);
115 if (mch == ch1) {
116 const char* patp = begp;
117 const char* anchorp = matchp;
118 while (patp < pattern_end) {
119 char ch = _pattern_getc(patp, pattern_end);
120 char mch = _string_getc(anchorp, match_end);
121 if (mch != ch) {
122 anchorp = nullptr;
123 break;
124 }
125 }
126 if (anchorp != nullptr) {
127 return anchorp; // Found a full copy of the anchor.
128 }
129 // That did not work, so restart the search for ch1.
130 }
131 }
132 return nullptr;
133 }
134
135 public:
136 bool string_match(const char* pattern,
137 const char* string) {
138 return string_match(pattern, pattern + strlen(pattern),
139 string, string + strlen(string));
140 }
141 bool string_match(const char* pattern, const char* pattern_end,
142 const char* string, const char* string_end) {
143 const char* patp = pattern;
144 switch (_pattern_getc(patp, pattern_end)) {
145 case string_match_eos:
146 return false; // Empty pattern is always false.
147 case string_match_star:
148 if (patp == pattern_end) {
149 return true; // Lone star pattern is always true.
150 }
151 break;
152 }
153 patp = pattern; // Reset after lookahead.
154 const char* matchp = string; // nullptr if failing
155 for (;;) {
156 int ch = _pattern_getc(patp, pattern_end);
157 switch (ch) {
158 case string_match_eos:
159 case string_match_comma:
160 // End of a list item; see if it's a match.
161 if (matchp == string_end) {
162 return true;
163 }
164 if (ch == string_match_comma) {
165 // Get ready to match the next item.
166 matchp = string;
167 continue;
168 }
169 return false; // End of all items.
170
171 case string_match_star:
172 if (matchp != nullptr) {
173 // Wildcard: Parse out following anchor word and look for it.
174 const char* begp = patp;
175 const char* endp = patp;
176 int anchor_len = 0;
177 for (;;) {
178 // get as many following regular characters as possible
179 endp = patp;
180 ch = _pattern_getc(patp, pattern_end);
181 if (ch <= 0) {
182 break;
183 }
184 anchor_len += 1;
185 }
186 // Anchor word [begp..endp) does not contain ch, so back up.
187 // Now do an eager match to the anchor word, and commit to it.
188 patp = endp;
189 if (ch == string_match_eos ||
190 ch == string_match_comma) {
191 // Anchor word is at end of pattern, so treat it as a fixed pattern.
192 const char* limitp = string_end - anchor_len;
193 matchp = limitp;
194 patp = begp;
195 // Resume normal scanning at the only possible match position.
196 continue;
197 }
198 // Find a floating occurrence of the anchor and continue matching.
199 // Note: This is greedy; there is no backtrack here. Good enough.
200 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
201 }
202 continue;
203 }
204 // Normal character.
205 if (matchp != nullptr) {
206 int mch = _string_getc(matchp, string_end);
207 if (mch != ch) {
208 matchp = nullptr;
209 }
210 }
211 }
212 }
213 };
214
215 // Match a wildcarded class list to a proposed class name (in internal form).
216 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
217 class ClassListMatcher : public StringMatcher {
218 public:
219 ClassListMatcher()
220 : StringMatcher(pattern_list_getc, class_name_getc)
221 { }
222
223 private:
224 static int pattern_list_getc(const char* &pattern_ptr,
225 const char* pattern_end) {
226 if (pattern_ptr == pattern_end) {
227 return string_match_eos;
228 }
229 int ch = (unsigned char) *pattern_ptr++;
230 switch (ch) {
231 case ' ': case '\t': case '\n': case '\r':
232 case ',':
233 // End of list item.
234 for (;;) {
235 switch (*pattern_ptr) {
236 case ' ': case '\t': case '\n': case '\r':
237 case ',':
238 pattern_ptr += 1; // Collapse multiple commas or spaces.
239 continue;
240 }
241 break;
242 }
243 return string_match_comma;
244
245 case '*':
246 // Wildcard, matching any number of chars.
247 while (*pattern_ptr == '*') {
248 pattern_ptr += 1; // Collapse multiple stars.
249 }
250 return string_match_star;
251
252 case '.':
253 ch = '/'; // Look for internal form of package separator
254 break;
255
256 case '\\':
257 // Superquote in pattern escapes * , whitespace, and itself.
258 if (pattern_ptr < pattern_end) {
259 ch = (unsigned char) *pattern_ptr++;
260 }
261 break;
262 }
263
264 assert(ch > 0, "regular char only");
265 return ch;
266 }
267
268 static int class_name_getc(const char* &name_ptr,
269 const char* name_end) {
270 if (name_ptr == name_end) {
271 return string_match_eos;
272 }
273 int ch = (unsigned char) *name_ptr++;
274 if (ch == '.') {
275 ch = '/'; // Normalize to internal form of package separator
276 }
277 return ch; // plain character
278 }
279 };
280
281 bool StringUtils::class_list_match(const char* class_pattern_list,
282 const char* class_name) {
283 if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0')
284 return false;
285 ClassListMatcher clm;
286 return clm.string_match(class_pattern_list, class_name);
287 }
288
289
290 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
291 if (needle[0] == '\0') {
292 return haystack; // empty needle matches with anything
293 }
294 for (size_t i = 0; haystack[i] != '\0'; i++) {
295 bool matches = true;
296 for (size_t j = 0; needle[j] != '\0'; j++) {
297 if (haystack[i + j] == '\0') {
298 return nullptr; // hit end of haystack, abort
299 }
300 if (tolower(haystack[i + j]) != tolower(needle[j])) {
301 matches = false;
302 break; // abort, try next i
303 }
304 }
305 if (matches) {
306 return &haystack[i]; // all j were ok for this i
307 }
308 }
309 return nullptr; // no i was a match
310 }
311
312 bool StringUtils::is_star_match(const char* star_pattern, const char* str) {
313 const int N = 1000;
314 char pattern[N]; // copy pattern into this to ensure null termination
315 jio_snprintf(pattern, N, "%s", star_pattern);// ensures null termination
316 char buf[N]; // copy parts of pattern into this
317 const char* str_idx = str;
318 const char* pattern_idx = pattern;
319 while (strlen(pattern_idx) > 0) {
320 // find next section in pattern
321 const char* pattern_part_end = strstr(pattern_idx, "*");
322 const char* pattern_part = pattern_idx;
323 if (pattern_part_end != nullptr) { // copy part into buffer
324 size_t pattern_part_len = pattern_part_end-pattern_part;
325 strncpy(buf, pattern_part, pattern_part_len);
326 buf[pattern_part_len] = '\0'; // end of string
327 pattern_part = buf;
328 }
329 // find this section in s, case insensitive
330 const char* str_match = strstr_nocase(str_idx, pattern_part);
331 if (str_match == nullptr) {
332 return false; // r_part did not match - abort
333 }
334 size_t match_len = strlen(pattern_part);
335 // advance to match position plus part length
336 str_idx = str_match + match_len;
337 // advance by part length and "*"
338 pattern_idx += match_len + (pattern_part_end == nullptr ? 0 : 1);
339 }
340 return true; // all parts of pattern matched
341 }
342
343 StringUtils::CommaSeparatedStringIterator::~CommaSeparatedStringIterator() {
344 FREE_C_HEAP_ARRAY(char, _list);
345 }
346
347 ccstrlist StringUtils::CommaSeparatedStringIterator::canonicalize(ccstrlist option_value) {
348 char* canonicalized_list = NEW_C_HEAP_ARRAY(char, strlen(option_value) + 1, mtCompiler);
349 int i = 0;
350 char current;
351 while ((current = option_value[i]) != '\0') {
352 if (current == '\n' || current == ' ') {
353 canonicalized_list[i] = ',';
354 } else {
355 canonicalized_list[i] = current;
356 }
357 i++;
358 }
359 canonicalized_list[i] = '\0';
360 return canonicalized_list;
361 }