9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "jvm_io.h"
27 #include "memory/allocation.hpp"
28 #include "utilities/debug.hpp"
29 #include "utilities/stringUtils.hpp"
30
31 #include <ctype.h>
32 #include <string.h>
33
34 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
35 int replace_count = 0;
36 size_t from_len = strlen(from);
37 size_t to_len = strlen(to);
38 assert(from_len >= to_len, "must not expand input");
39
40 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
41 char* left_over = dst + from_len;
42 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
43 dst += to_len; // skip over the replacement.
44 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
45 ++ replace_count;
46 }
47
48 return replace_count;
54 // filter out zero-length strings else we will underflow on len-1 below
55 if (len1 == 0 || len2 == 0) {
56 return 0.0;
57 }
58
59 size_t total = len1 + len2;
60 size_t hit = 0;
61
62 for (size_t i = 0; i < len1 - 1; i++) {
63 for (size_t j = 0; j < len2 - 1; j++) {
64 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
65 ++hit;
66 break;
67 }
68 }
69 }
70
71 return 2.0 * (double) hit / (double) total;
72 }
73
74 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
75 if (needle[0] == '\0') {
76 return haystack; // empty needle matches with anything
77 }
78 for (size_t i = 0; haystack[i] != '\0'; i++) {
79 bool matches = true;
80 for (size_t j = 0; needle[j] != '\0'; j++) {
81 if (haystack[i + j] == '\0') {
82 return nullptr; // hit end of haystack, abort
83 }
84 if (tolower(haystack[i + j]) != tolower(needle[j])) {
85 matches = false;
86 break; // abort, try next i
87 }
88 }
89 if (matches) {
90 return &haystack[i]; // all j were ok for this i
91 }
92 }
93 return nullptr; // no i was a match
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "jvm_io.h"
27 #include "memory/allocation.hpp"
28 #include "utilities/debug.hpp"
29 #include "utilities/ostream.hpp"
30 #include "utilities/stringUtils.hpp"
31
32 #include <ctype.h>
33 #include <string.h>
34
35 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
36 int replace_count = 0;
37 size_t from_len = strlen(from);
38 size_t to_len = strlen(to);
39 assert(from_len >= to_len, "must not expand input");
40
41 for (char* dst = string; *dst && (dst = strstr(dst, from)) != nullptr;) {
42 char* left_over = dst + from_len;
43 memmove(dst, to, to_len); // does not copy trailing 0 of <to>
44 dst += to_len; // skip over the replacement.
45 memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
46 ++ replace_count;
47 }
48
49 return replace_count;
55 // filter out zero-length strings else we will underflow on len-1 below
56 if (len1 == 0 || len2 == 0) {
57 return 0.0;
58 }
59
60 size_t total = len1 + len2;
61 size_t hit = 0;
62
63 for (size_t i = 0; i < len1 - 1; i++) {
64 for (size_t j = 0; j < len2 - 1; j++) {
65 if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
66 ++hit;
67 break;
68 }
69 }
70 }
71
72 return 2.0 * (double) hit / (double) total;
73 }
74
75 class StringMatcher {
76 public:
77 typedef int getc_function_t(const char* &source, const char* limit);
78
79 private:
80 // These do not get properly inlined.
81 // For full performance, this should be a template class
82 // parameterized by two function arguments.
83 getc_function_t* _pattern_getc;
84 getc_function_t* _string_getc;
85
86 public:
87 StringMatcher(getc_function_t pattern_getc,
88 getc_function_t string_getc)
89 : _pattern_getc(pattern_getc),
90 _string_getc(string_getc)
91 { }
92
93 enum { // special results from _pattern_getc
94 string_match_comma = -0x100 + ',',
95 string_match_star = -0x100 + '*',
96 string_match_eos = -0x100 + '\0'
97 };
98
99 private:
100 const char*
101 skip_anchor_word(const char* match,
102 const char* match_end,
103 int anchor_length,
104 const char* pattern,
105 const char* pattern_end) {
106 assert(pattern < pattern_end && anchor_length > 0, "");
107 const char* begp = pattern;
108 int ch1 = _pattern_getc(begp, pattern_end);
109 // note that begp is now advanced over ch1
110 assert(ch1 > 0, "regular char only");
111 const char* matchp = match;
112 const char* limitp = match_end - anchor_length;
113 while (matchp <= limitp) {
114 int mch = _string_getc(matchp, match_end);
115 if (mch == ch1) {
116 const char* patp = begp;
117 const char* anchorp = matchp;
118 while (patp < pattern_end) {
119 char ch = _pattern_getc(patp, pattern_end);
120 char mch = _string_getc(anchorp, match_end);
121 if (mch != ch) {
122 anchorp = nullptr;
123 break;
124 }
125 }
126 if (anchorp != nullptr) {
127 return anchorp; // Found a full copy of the anchor.
128 }
129 // That did not work, so restart the search for ch1.
130 }
131 }
132 return nullptr;
133 }
134
135 public:
136 bool string_match(const char* pattern,
137 const char* string) {
138 return string_match(pattern, pattern + strlen(pattern),
139 string, string + strlen(string));
140 }
141 bool string_match(const char* pattern, const char* pattern_end,
142 const char* string, const char* string_end) {
143 const char* patp = pattern;
144 switch (_pattern_getc(patp, pattern_end)) {
145 case string_match_eos:
146 return false; // Empty pattern is always false.
147 case string_match_star:
148 if (patp == pattern_end) {
149 return true; // Lone star pattern is always true.
150 }
151 break;
152 }
153 patp = pattern; // Reset after lookahead.
154 const char* matchp = string; // nullptr if failing
155 for (;;) {
156 int ch = _pattern_getc(patp, pattern_end);
157 switch (ch) {
158 case string_match_eos:
159 case string_match_comma:
160 // End of a list item; see if it's a match.
161 if (matchp == string_end) {
162 return true;
163 }
164 if (ch == string_match_comma) {
165 // Get ready to match the next item.
166 matchp = string;
167 continue;
168 }
169 return false; // End of all items.
170
171 case string_match_star:
172 if (matchp != nullptr) {
173 // Wildcard: Parse out following anchor word and look for it.
174 const char* begp = patp;
175 const char* endp = patp;
176 int anchor_len = 0;
177 for (;;) {
178 // get as many following regular characters as possible
179 endp = patp;
180 ch = _pattern_getc(patp, pattern_end);
181 if (ch <= 0) {
182 break;
183 }
184 anchor_len += 1;
185 }
186 // Anchor word [begp..endp) does not contain ch, so back up.
187 // Now do an eager match to the anchor word, and commit to it.
188 patp = endp;
189 if (ch == string_match_eos ||
190 ch == string_match_comma) {
191 // Anchor word is at end of pattern, so treat it as a fixed pattern.
192 const char* limitp = string_end - anchor_len;
193 matchp = limitp;
194 patp = begp;
195 // Resume normal scanning at the only possible match position.
196 continue;
197 }
198 // Find a floating occurrence of the anchor and continue matching.
199 // Note: This is greedy; there is no backtrack here. Good enough.
200 matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
201 }
202 continue;
203 }
204 // Normal character.
205 if (matchp != nullptr) {
206 int mch = _string_getc(matchp, string_end);
207 if (mch != ch) {
208 matchp = nullptr;
209 }
210 }
211 }
212 }
213 };
214
215 // Match a wildcarded class list to a proposed class name (in internal form).
216 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
217 class ClassListMatcher : public StringMatcher {
218 public:
219 ClassListMatcher()
220 : StringMatcher(pattern_list_getc, class_name_getc)
221 { }
222
223 private:
224 static int pattern_list_getc(const char* &pattern_ptr,
225 const char* pattern_end) {
226 if (pattern_ptr == pattern_end) {
227 return string_match_eos;
228 }
229 int ch = (unsigned char) *pattern_ptr++;
230 switch (ch) {
231 case ' ': case '\t': case '\n': case '\r':
232 case ',':
233 // End of list item.
234 for (;;) {
235 switch (*pattern_ptr) {
236 case ' ': case '\t': case '\n': case '\r':
237 case ',':
238 pattern_ptr += 1; // Collapse multiple commas or spaces.
239 continue;
240 }
241 break;
242 }
243 return string_match_comma;
244
245 case '*':
246 // Wildcard, matching any number of chars.
247 while (*pattern_ptr == '*') {
248 pattern_ptr += 1; // Collapse multiple stars.
249 }
250 return string_match_star;
251
252 case '.':
253 ch = '/'; // Look for internal form of package separator
254 break;
255
256 case '\\':
257 // Superquote in pattern escapes * , whitespace, and itself.
258 if (pattern_ptr < pattern_end) {
259 ch = (unsigned char) *pattern_ptr++;
260 }
261 break;
262 }
263
264 assert(ch > 0, "regular char only");
265 return ch;
266 }
267
268 static int class_name_getc(const char* &name_ptr,
269 const char* name_end) {
270 if (name_ptr == name_end) {
271 return string_match_eos;
272 }
273 int ch = (unsigned char) *name_ptr++;
274 if (ch == '.') {
275 ch = '/'; // Normalize to internal form of package separator
276 }
277 return ch; // plain character
278 }
279 };
280
281 bool StringUtils::class_list_match(const char* class_pattern_list,
282 const char* class_name) {
283 if (class_pattern_list == nullptr || class_name == nullptr || class_name[0] == '\0')
284 return false;
285 ClassListMatcher clm;
286 return clm.string_match(class_pattern_list, class_name);
287 }
288
289
290 const char* StringUtils::strstr_nocase(const char* haystack, const char* needle) {
291 if (needle[0] == '\0') {
292 return haystack; // empty needle matches with anything
293 }
294 for (size_t i = 0; haystack[i] != '\0'; i++) {
295 bool matches = true;
296 for (size_t j = 0; needle[j] != '\0'; j++) {
297 if (haystack[i + j] == '\0') {
298 return nullptr; // hit end of haystack, abort
299 }
300 if (tolower(haystack[i + j]) != tolower(needle[j])) {
301 matches = false;
302 break; // abort, try next i
303 }
304 }
305 if (matches) {
306 return &haystack[i]; // all j were ok for this i
307 }
308 }
309 return nullptr; // no i was a match
|