8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "jvm_io.h"
26 #include "memory/allocation.hpp"
27 #include "utilities/debug.hpp"
28 #include "utilities/stringUtils.hpp"
29
30 #include <ctype.h>
31 #include <string.h>
32
33 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
34 int replace_count = 0;
35 size_t from_len = strlen(from);
36 size_t to_len = strlen(to);
37 assert(from_len >= to_len, "must not expand input");
38
39 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
40 char* left_over = dst + from_len;
41 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
42 dst += to_len; // skip over the replacement.
43 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
44 ++ replace_count;
45 }
46
47 return replace_count;
53 // filter out zero-length strings else we will underflow on len-1 below
54 if (len1 == 0 || len2 == 0) {
55 return 0.0;
56 }
57
58 size_t total = len1 + len2;
59 size_t hit = 0;
60
61 for (size_t i = 0; i < len1 - 1; i++) {
62 for (size_t j = 0; j < len2 - 1; j++) {
63 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
64 ++hit;
65 break;
66 }
67 }
68 }
69
70 return 2.0 * (double) hit / (double) total;
71 }
72
73 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
74 if (needle[0] == '\0') {
75 return haystack; // empty needle matches with anything
76 }
77 for (size_t i = 0; haystack[i] != '\0'; i++) {
78 bool matches = true;
79 for (size_t j = 0; needle[j] != '\0'; j++) {
80 if (haystack[i + j] == '\0') {
81 return nullptr; // hit end of haystack, abort
82 }
83 if (tolower(haystack[i + j]) != tolower(needle[j])) {
84 matches = false;
85 break; // abort, try next i
86 }
87 }
88 if (matches) {
89 return &haystack[i]; // all j were ok for this i
90 }
91 }
92 return nullptr; // no i was a match
|
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "jvm_io.h"
26 #include "memory/allocation.hpp"
27 #include "utilities/debug.hpp"
28 #include "utilities/ostream.hpp"
29 #include "utilities/stringUtils.hpp"
30
31 #include <ctype.h>
32 #include <string.h>
33
34 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
35 int replace_count = 0;
36 size_t from_len = strlen(from);
37 size_t to_len = strlen(to);
38 assert(from_len >= to_len, "must not expand input");
39
40 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
41 char* left_over = dst + from_len;
42 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
43 dst += to_len; // skip over the replacement.
44 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
45 ++ replace_count;
46 }
47
48 return replace_count;
54 // filter out zero-length strings else we will underflow on len-1 below
55 if (len1 == 0 || len2 == 0) {
56 return 0.0;
57 }
58
59 size_t total = len1 + len2;
60 size_t hit = 0;
61
62 for (size_t i = 0; i < len1 - 1; i++) {
63 for (size_t j = 0; j < len2 - 1; j++) {
64 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
65 ++hit;
66 break;
67 }
68 }
69 }
70
71 return 2.0 * (double) hit / (double) total;
72 }
73
74 class StringMatcher {
75 public:
76 typedef int getc_function_t(const char* &source, const char* limit);
77
78 private:
79 // These do not get properly inlined.
80 // For full performance, this should be a template class
81 // parameterized by two function arguments.
82 getc_function_t* _pattern_getc;
83 getc_function_t* _string_getc;
84
85 public:
86 StringMatcher(getc_function_t pattern_getc,
87 getc_function_t string_getc)
88 : _pattern_getc(pattern_getc),
89 _string_getc(string_getc)
90 { }
91
92 enum { // special results from _pattern_getc
93 string_match_comma = -0x100 + ',',
94 string_match_star = -0x100 + '*',
95 string_match_eos = -0x100 + '\0'
96 };
97
98 private:
99 const char*
100 skip_anchor_word(const char* match,
101 const char* match_end,
102 int anchor_length,
103 const char* pattern,
104 const char* pattern_end) {
105 assert(pattern < pattern_end && anchor_length > 0, "");
106 const char* begp = pattern;
107 int ch1 = _pattern_getc(begp, pattern_end);
108 // note that begp is now advanced over ch1
109 assert(ch1 > 0, "regular char only");
110 const char* matchp = match;
111 const char* limitp = match_end - anchor_length;
112 while (matchp <= limitp) {
113 int mch = _string_getc(matchp, match_end);
114 if (mch == ch1) {
115 const char* patp = begp;
116 const char* anchorp = matchp;
117 while (patp < pattern_end) {
118 char ch = _pattern_getc(patp, pattern_end);
119 char mch = _string_getc(anchorp, match_end);
120 if (mch != ch) {
121 anchorp = nullptr;
122 break;
123 }
124 }
125 if (anchorp != nullptr) {
126 return anchorp; // Found a full copy of the anchor.
127 }
128 // That did not work, so restart the search for ch1.
129 }
130 }
131 return nullptr;
132 }
133
134 public:
135 bool string_match(const char* pattern,
136 const char* string) {
137 return string_match(pattern, pattern + strlen(pattern),
138 string, string + strlen(string));
139 }
140 bool string_match(const char* pattern, const char* pattern_end,
141 const char* string, const char* string_end) {
142 const char* patp = pattern;
143 switch (_pattern_getc(patp, pattern_end)) {
144 case string_match_eos:
145 return false; // Empty pattern is always false.
146 case string_match_star:
147 if (patp == pattern_end) {
148 return true; // Lone star pattern is always true.
149 }
150 break;
151 }
152 patp = pattern; // Reset after lookahead.
153 const char* matchp = string; // nullptr if failing
154 for (;;) {
155 int ch = _pattern_getc(patp, pattern_end);
156 switch (ch) {
157 case string_match_eos:
158 case string_match_comma:
159 // End of a list item; see if it's a match.
160 if (matchp == string_end) {
161 return true;
162 }
163 if (ch == string_match_comma) {
164 // Get ready to match the next item.
165 matchp = string;
166 continue;
167 }
168 return false; // End of all items.
169
170 case string_match_star:
171 if (matchp != nullptr) {
172 // Wildcard: Parse out following anchor word and look for it.
173 const char* begp = patp;
174 const char* endp = patp;
175 int anchor_len = 0;
176 for (;;) {
177 // get as many following regular characters as possible
178 endp = patp;
179 ch = _pattern_getc(patp, pattern_end);
180 if (ch <= 0) {
181 break;
182 }
183 anchor_len += 1;
184 }
185 // Anchor word [begp..endp) does not contain ch, so back up.
186 // Now do an eager match to the anchor word, and commit to it.
187 patp = endp;
188 if (ch == string_match_eos ||
189 ch == string_match_comma) {
190 // Anchor word is at end of pattern, so treat it as a fixed pattern.
191 const char* limitp = string_end - anchor_len;
192 matchp = limitp;
193 patp = begp;
194 // Resume normal scanning at the only possible match position.
195 continue;
196 }
197 // Find a floating occurrence of the anchor and continue matching.
198 // Note: This is greedy; there is no backtrack here. Good enough.
199 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
200 }
201 continue;
202 }
203 // Normal character.
204 if (matchp != nullptr) {
205 int mch = _string_getc(matchp, string_end);
206 if (mch != ch) {
207 matchp = nullptr;
208 }
209 }
210 }
211 }
212 };
213
214 // Match a wildcarded class list to a proposed class name (in internal form).
215 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
216 class ClassListMatcher : public StringMatcher {
217 public:
218 ClassListMatcher()
219 : StringMatcher(pattern_list_getc, class_name_getc)
220 { }
221
222 private:
223 static int pattern_list_getc(const char* &pattern_ptr,
224 const char* pattern_end) {
225 if (pattern_ptr == pattern_end) {
226 return string_match_eos;
227 }
228 int ch = (unsigned char) *pattern_ptr++;
229 switch (ch) {
230 case ' ': case '\t': case '\n': case '\r':
231 case ',':
232 // End of list item.
233 for (;;) {
234 switch (*pattern_ptr) {
235 case ' ': case '\t': case '\n': case '\r':
236 case ',':
237 pattern_ptr += 1; // Collapse multiple commas or spaces.
238 continue;
239 }
240 break;
241 }
242 return string_match_comma;
243
244 case '*':
245 // Wildcard, matching any number of chars.
246 while (*pattern_ptr == '*') {
247 pattern_ptr += 1; // Collapse multiple stars.
248 }
249 return string_match_star;
250
251 case '.':
252 ch = '/'; // Look for internal form of package separator
253 break;
254
255 case '\\':
256 // Superquote in pattern escapes * , whitespace, and itself.
257 if (pattern_ptr < pattern_end) {
258 ch = (unsigned char) *pattern_ptr++;
259 }
260 break;
261 }
262
263 assert(ch > 0, "regular char only");
264 return ch;
265 }
266
267 static int class_name_getc(const char* &name_ptr,
268 const char* name_end) {
269 if (name_ptr == name_end) {
270 return string_match_eos;
271 }
272 int ch = (unsigned char) *name_ptr++;
273 if (ch == '.') {
274 ch = '/'; // Normalize to internal form of package separator
275 }
276 return ch; // plain character
277 }
278 };
279
280 bool StringUtils::class_list_match(const char* class_pattern_list,
281 const char* class_name) {
282 if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0')
283 return false;
284 ClassListMatcher clm;
285 return clm.string_match(class_pattern_list, class_name);
286 }
287
288
289 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
290 if (needle[0] == '\0') {
291 return haystack; // empty needle matches with anything
292 }
293 for (size_t i = 0; haystack[i] != '\0'; i++) {
294 bool matches = true;
295 for (size_t j = 0; needle[j] != '\0'; j++) {
296 if (haystack[i + j] == '\0') {
297 return nullptr; // hit end of haystack, abort
298 }
299 if (tolower(haystack[i + j]) != tolower(needle[j])) {
300 matches = false;
301 break; // abort, try next i
302 }
303 }
304 if (matches) {
305 return &haystack[i]; // all j were ok for this i
306 }
307 }
308 return nullptr; // no i was a match
|