< prev index next >

src/hotspot/share/utilities/stringUtils.cpp

Print this page

  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "utilities/debug.hpp"

 27 #include "utilities/stringUtils.hpp"
 28 
 29 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
 30   int replace_count = 0;
 31   size_t from_len = strlen(from);
 32   size_t to_len = strlen(to);
 33   assert(from_len >= to_len, "must not expand input");
 34 
 35   for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) {
 36     char* left_over = dst + from_len;
 37     memmove(dst, to, to_len);                       // does not copy trailing 0 of <to>
 38     dst += to_len;                                  // skip over the replacement.
 39     memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
 40     ++ replace_count;
 41   }
 42 
 43   return replace_count;
 44 }
 45 
 46 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {

 48 
 49   // filter out zero-length strings else we will underflow on len-1 below
 50   if (len1 == 0 || len2 == 0) {
 51     return 0.0;
 52   }
 53 
 54   size_t total = len1 + len2;
 55   size_t hit = 0;
 56 
 57   for (size_t i = 0; i < len1 - 1; i++) {
 58     for (size_t j = 0; j < len2 - 1; j++) {
 59       if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
 60         ++hit;
 61         break;
 62       }
 63     }
 64   }
 65 
 66   return 2.0 * (double) hit / (double) total;
 67 }
























































































































































































































  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "utilities/debug.hpp"
 27 #include "utilities/ostream.hpp"
 28 #include "utilities/stringUtils.hpp"
 29 
 30 int StringUtils::replace_no_expand(char* string, const char* from, const char* to) {
 31   int replace_count = 0;
 32   size_t from_len = strlen(from);
 33   size_t to_len = strlen(to);
 34   assert(from_len >= to_len, "must not expand input");
 35 
 36   for (char* dst = string; *dst && (dst = strstr(dst, from)) != NULL;) {
 37     char* left_over = dst + from_len;
 38     memmove(dst, to, to_len);                       // does not copy trailing 0 of <to>
 39     dst += to_len;                                  // skip over the replacement.
 40     memmove(dst, left_over, strlen(left_over) + 1); // copies the trailing 0 of <left_over>
 41     ++ replace_count;
 42   }
 43 
 44   return replace_count;
 45 }
 46 
 47 double StringUtils::similarity(const char* str1, size_t len1, const char* str2, size_t len2) {

 49 
 50   // filter out zero-length strings else we will underflow on len-1 below
 51   if (len1 == 0 || len2 == 0) {
 52     return 0.0;
 53   }
 54 
 55   size_t total = len1 + len2;
 56   size_t hit = 0;
 57 
 58   for (size_t i = 0; i < len1 - 1; i++) {
 59     for (size_t j = 0; j < len2 - 1; j++) {
 60       if ((str1[i] == str2[j]) && (str1[i+1] == str2[j+1])) {
 61         ++hit;
 62         break;
 63       }
 64     }
 65   }
 66 
 67   return 2.0 * (double) hit / (double) total;
 68 }
 69 
 70 class StringMatcher {
 71  public:
 72   typedef int getc_function_t(const char* &source, const char* limit);
 73 
 74  private:
 75   // These do not get properly inlined.
 76   // For full performance, this should be a template class
 77   // parameterized by two function arguments.
 78   getc_function_t* _pattern_getc;
 79   getc_function_t* _string_getc;
 80 
 81  public:
 82   StringMatcher(getc_function_t pattern_getc,
 83                 getc_function_t string_getc)
 84     : _pattern_getc(pattern_getc),
 85       _string_getc(string_getc)
 86   { }
 87 
 88   enum {  // special results from _pattern_getc
 89     string_match_comma  = -0x100 + ',',
 90     string_match_star   = -0x100 + '*',
 91     string_match_eos    = -0x100 + '\0'
 92   };
 93 
 94  private:
 95   const char*
 96   skip_anchor_word(const char* match,
 97                    const char* match_end,
 98                    int anchor_length,
 99                    const char* pattern,
100                    const char* pattern_end) {
101     assert(pattern < pattern_end && anchor_length > 0, "");
102     const char* begp = pattern;
103     int ch1 = _pattern_getc(begp, pattern_end);
104     // note that begp is now advanced over ch1
105     assert(ch1 > 0, "regular char only");
106     const char* matchp = match;
107     const char* limitp = match_end - anchor_length;
108     while (matchp <= limitp) {
109       int mch = _string_getc(matchp, match_end);
110       if (mch == ch1) {
111         const char* patp = begp;
112         const char* anchorp = matchp;
113         while (patp < pattern_end) {
114           char ch = _pattern_getc(patp, pattern_end);
115           char mch = _string_getc(anchorp, match_end);
116           if (mch != ch) {
117             anchorp = NULL;
118             break;
119           }
120         }
121         if (anchorp != NULL) {
122           return anchorp;  // Found a full copy of the anchor.
123         }
124         // That did not work, so restart the search for ch1.
125       }
126     }
127     return NULL;
128   }
129 
130  public:
131   bool string_match(const char* pattern,
132                     const char* string) {
133     return string_match(pattern, pattern + strlen(pattern),
134                         string, string + strlen(string));
135   }
136   bool string_match(const char* pattern, const char* pattern_end,
137                     const char* string, const char* string_end) {
138     const char* patp = pattern;
139     switch (_pattern_getc(patp, pattern_end)) {
140     case string_match_eos:
141       return false;  // Empty pattern is always false.
142     case string_match_star:
143       if (patp == pattern_end) {
144         return true;   // Lone star pattern is always true.
145       }
146       break;
147     }
148     patp = pattern;  // Reset after lookahead.
149     const char* matchp = string;  // NULL if failing
150     for (;;) {
151       int ch = _pattern_getc(patp, pattern_end);
152       switch (ch) {
153       case string_match_eos:
154       case string_match_comma:
155         // End of a list item; see if it's a match.
156         if (matchp == string_end) {
157           return true;
158         }
159         if (ch == string_match_comma) {
160           // Get ready to match the next item.
161           matchp = string;
162           continue;
163         }
164         return false;  // End of all items.
165 
166       case string_match_star:
167         if (matchp != NULL) {
168           // Wildcard:  Parse out following anchor word and look for it.
169           const char* begp = patp;
170           const char* endp = patp;
171           int anchor_len = 0;
172           for (;;) {
173             // get as many following regular characters as possible
174             endp = patp;
175             ch = _pattern_getc(patp, pattern_end);
176             if (ch <= 0) {
177               break;
178             }
179             anchor_len += 1;
180           }
181           // Anchor word [begp..endp) does not contain ch, so back up.
182           // Now do an eager match to the anchor word, and commit to it.
183           patp = endp;
184           if (ch == string_match_eos ||
185               ch == string_match_comma) {
186             // Anchor word is at end of pattern, so treat it as a fixed pattern.
187             const char* limitp = string_end - anchor_len;
188             matchp = limitp;
189             patp = begp;
190             // Resume normal scanning at the only possible match position.
191             continue;
192           }
193           // Find a floating occurrence of the anchor and continue matching.
194           // Note:  This is greedy; there is no backtrack here.  Good enough.
195           matchp = skip_anchor_word(matchp, string_end, anchor_len, begp, endp);
196         }
197         continue;
198       }
199       // Normal character.
200       if (matchp != NULL) {
201         int mch = _string_getc(matchp, string_end);
202         if (mch != ch) {
203           matchp = NULL;
204         }
205       }
206     }
207   }
208 };
209 
210 // Match a wildcarded class list to a proposed class name (in internal form).
211 // Commas or newlines separate multiple possible matches; stars are shell-style wildcards.
212 class ClassListMatcher : public StringMatcher {
213  public:
214   ClassListMatcher()
215     : StringMatcher(pattern_list_getc, class_name_getc)
216   { }
217 
218  private:
219   static int pattern_list_getc(const char* &pattern_ptr,
220                                const char* pattern_end) {
221     if (pattern_ptr == pattern_end) {
222       return string_match_eos;
223     }
224     int ch = (unsigned char) *pattern_ptr++;
225     switch (ch) {
226     case ' ': case '\t': case '\n': case '\r':
227     case ',':
228       // End of list item.
229       for (;;) {
230         switch (*pattern_ptr) {
231         case ' ': case '\t': case '\n': case '\r':
232         case ',':
233           pattern_ptr += 1;  // Collapse multiple commas or spaces.
234           continue;
235         }
236         break;
237       }
238       return string_match_comma;
239 
240     case '*':
241       // Wildcard, matching any number of chars.
242       while (*pattern_ptr == '*') {
243         pattern_ptr += 1;  // Collapse multiple stars.
244       }
245       return string_match_star;
246 
247     case '.':
248       ch = '/';   // Look for internal form of package separator
249       break;
250 
251     case '\\':
252       // Superquote in pattern escapes * , whitespace, and itself.
253       if (pattern_ptr < pattern_end) {
254         ch = (unsigned char) *pattern_ptr++;
255       }
256       break;
257     }
258 
259     assert(ch > 0, "regular char only");
260     return ch;
261   }
262 
263   static int class_name_getc(const char* &name_ptr,
264                              const char* name_end) {
265     if (name_ptr == name_end) {
266       return string_match_eos;
267     }
268     int ch = (unsigned char) *name_ptr++;
269     if (ch == '.') {
270       ch = '/';   // Normalize to internal form of package separator
271     }
272     return ch;  // plain character
273   }
274 };
275 
276 bool StringUtils::class_list_match(const char* class_pattern_list,
277                                    const char* class_name) {
278   if (class_pattern_list == NULL || class_name == NULL || class_name[0] == '\0')
279     return false;
280   ClassListMatcher clm;
281   return clm.string_match(class_pattern_list, class_name);
282 }
283 
< prev index next >