< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page

        

@@ -2098,11 +2098,11 @@
      * Parsing of sequences between alternations.
      */
     private Node sequence(Node end) {
         Node head = null;
         Node tail = null;
-        Node node;
+        Node node = null;
     LOOP:
         for (;;) {
             int ch = peek();
             switch (ch) {
             case '(':

@@ -2615,10 +2615,11 @@
      */
     private CharPredicate clazz(boolean consume) {
         CharPredicate prev = null;
         CharPredicate curr = null;
         BitClass bits = new BitClass();
+        BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch];
 
         boolean isNeg = false;
         boolean hasBits = false;
         int ch = next();
 

@@ -2655,13 +2656,13 @@
                             ch = peek();
                         }
                         if (hasBits) {
                             // bits used, union has high precedence
                             if (prev == null) {
-                                prev = curr = bits;
+                                prev = curr = bitsP;
                             } else {
-                                prev = prev.union(bits);
+                                prev = prev.union(bitsP);
                             }
                             hasBits = false;
                         }
                         if (right != null)
                             curr = right;

@@ -2686,13 +2687,13 @@
                 case ']':
                     if (prev != null || hasBits) {
                         if (consume)
                             next();
                         if (prev == null)
-                            prev = bits;
+                            prev = bitsP;
                         else if (hasBits)
-                            prev = prev.union(bits);
+                            prev = prev.union(bitsP);
                         if (isNeg)
                             return prev.negate();
                         return prev;
                     }
                     break;

@@ -2944,12 +2945,12 @@
      * the group. Sometimes a double return system is used where the tail is
      * returned in root.
      */
     private Node group0() {
         boolean capturingGroup = false;
-        Node head;
-        Node tail;
+        Node head = null;
+        Node tail = null;
         int save = flags0;
         int saveTCNCount = topClosureNodes.size();
         root = null;
         int ch = next();
         if (ch == '?') {

@@ -2994,11 +2995,11 @@
                 }
                 int start = cursor;
                 head = createGroup(true);
                 tail = root;
                 head.next = expr(tail);
-                tail.next = LookBehindEndNode.INSTANCE;
+                tail.next = lookbehindEnd;
                 TreeInfo info = new TreeInfo();
                 head.study(info);
                 if (info.maxValid == false) {
                     throw error("Look-behind group does not have "
                                 + "an obvious maximum length");

@@ -3250,10 +3251,11 @@
      * Processes repetition. If the next character peeked is a quantifier
      * then new nodes must be appended to handle the repetition.
      * Prev could be a single or a group, so it could be a chain of nodes.
      */
     private Node closure(Node prev) {
+        Node atom;
         int ch = peek();
         switch (ch) {
         case '?':
             ch = next();
             if (ch == '?') {

@@ -3275,43 +3277,42 @@
                 try {
                     do {
                         cmin = Math.addExact(Math.multiplyExact(cmin, 10),
                                              ch - '0');
                     } while (ASCII.isDigit(ch = read()));
+                    cmax = cmin;
                     if (ch == ',') {
                         ch = read();
-                        if (ch == '}') {
-                            unread();
-                            return curly(prev, cmin);
-                        } else {
+                        cmax = MAX_REPS;
+                        if (ch != '}') {
                             cmax = 0;
                             while (ASCII.isDigit(ch)) {
                                 cmax = Math.addExact(Math.multiplyExact(cmax, 10),
                                                      ch - '0');
                                 ch = read();
                             }
                         }
-                    } else {
-                        cmax = cmin;
                     }
                 } catch (ArithmeticException ae) {
                     throw error("Illegal repetition range");
                 }
                 if (ch != '}')
                     throw error("Unclosed counted closure");
                 if (cmax < cmin)
                     throw error("Illegal repetition range");
+                Curly curly;
                 ch = peek();
                 if (ch == '?') {
                     next();
-                    return new Curly(prev, cmin, cmax, Qtype.LAZY);
+                    curly = new Curly(prev, cmin, cmax, Qtype.LAZY);
                 } else if (ch == '+') {
                     next();
-                    return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
+                    curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
                 } else {
-                    return new Curly(prev, cmin, cmax, Qtype.GREEDY);
+                    curly = new Curly(prev, cmin, cmax, Qtype.GREEDY);
                 }
+                return curly;
             } else {
                 throw error("Illegal repetition");
             }
         default:
             return prev;

@@ -3482,14 +3483,18 @@
     /**
      *  Creates a bit vector for matching Latin-1 values. A normal BitClass
      *  never matches values above Latin-1, and a complemented BitClass always
      *  matches values above Latin-1.
      */
-    static final class BitClass implements BmpCharPredicate {
+    static final class BitClass extends BmpCharProperty {
         final boolean[] bits;
         BitClass() {
-            bits = new boolean[256];
+            this(new boolean[256]);
+        }
+        private BitClass(boolean[] bits) {
+            super( ch -> ch < 256 && bits[ch]);
+            this.bits = bits;
         }
         BitClass add(int c, int flags) {
             assert c >= 0 && c <= 255;
             if ((flags & CASE_INSENSITIVE) != 0) {
                 if (ASCII.isAscii(c)) {

@@ -3501,16 +3506,12 @@
                 }
             }
             bits[c] = true;
             return this;
         }
-        public boolean is(int ch) {
-            return ch < 256 && bits[ch];
-        }
     }
 
-
     /**
      *  Utility method for creating a string slice matcher.
      */
     private Node newSlice(int[] buf, int count, boolean hasSupplementary) {
         int[] tmp = new int[count];

@@ -3919,11 +3920,11 @@
     /**
      * Abstract node class to match one character satisfying some
      * boolean property.
      */
     static class CharProperty extends Node {
-        final CharPredicate predicate;
+        CharPredicate predicate;
 
         CharProperty (CharPredicate predicate) {
             this.predicate = predicate;
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {

@@ -3970,20 +3971,11 @@
 
         boolean match(Matcher matcher, int i, CharSequence seq) {
             if (i < matcher.to) {
                 int ch0 = Character.codePointAt(seq, i);
                 int n = Character.charCount(ch0);
-                int j = i + n;
-                // Fast check if it's necessary to call Normalizer;
-                // testing Grapheme.isBoundary is enough for this case
-                while (j < matcher.to) {
-                    int ch1 = Character.codePointAt(seq, j);
-                    if (Grapheme.isBoundary(ch0, ch1))
-                        break;
-                    ch0 = ch1;
-                    j += Character.charCount(ch1);
-                }
+                int j = Grapheme.nextBoundary(seq, i, matcher.to);
                 if (i + n == j) {    // single, assume nfc cp
                     if (predicate.is(ch0))
                         return next.match(matcher, j, seq);
                 } else {
                     while (i + n < j) {

@@ -4263,22 +4255,22 @@
             }
         }
     }
 
     /**
-     * Handles the greedy style repetition with the specified minimum
-     * and the maximum equal to MAX_REPS, for *, + and {N,} quantifiers.
+     * Handles the greedy style repetition with the minimum either be
+     * 0 or 1 and the maximum be MAX_REPS, for * and + quantifier.
      */
     static class CharPropertyGreedy extends Node {
         final CharPredicate predicate;
         final int cmin;
 
         CharPropertyGreedy(CharProperty cp, int cmin) {
             this.predicate = cp.predicate;
             this.cmin = cmin;
         }
-        boolean match(Matcher matcher, int i, CharSequence seq) {
+        boolean match(Matcher matcher, int i,  CharSequence seq) {
             int n = 0;
             int to = matcher.to;
             // greedy, all the way down
             while (i < to) {
                 int ch = Character.codePointAt(seq, i);

@@ -4317,11 +4309,11 @@
 
         BmpCharPropertyGreedy(BmpCharProperty bcp, int cmin) {
             super(bcp, cmin);
         }
 
-        boolean match(Matcher matcher, int i, CharSequence seq) {
+        boolean match(Matcher matcher, int i,  CharSequence seq) {
             int n = 0;
             int to = matcher.to;
             while (i < to && predicate.is(seq.charAt(i))) {
                 i++; n++;
             }

@@ -4694,11 +4686,11 @@
      * "next" but not the "study", so we can collect the TreeInfo
      * of each atom node without including the TreeInfo of the
      * "next".
      */
     static final class BranchConn extends Node {
-        BranchConn() {}
+        BranchConn() {};
         boolean match(Matcher matcher, int i, CharSequence seq) {
             return next.match(matcher, i, seq);
         }
         boolean study(TreeInfo info) {
             return info.deterministic;

@@ -4791,10 +4783,38 @@
             matcher.locals[localIndex] = i;
             boolean ret = next.match(matcher, i, seq);
             matcher.locals[localIndex] = save;
             return ret;
         }
+        boolean matchRef(Matcher matcher, int i, CharSequence seq) {
+            int save = matcher.locals[localIndex];
+            matcher.locals[localIndex] = ~i; // HACK
+            boolean ret = next.match(matcher, i, seq);
+            matcher.locals[localIndex] = save;
+            return ret;
+        }
+    }
+
+    /**
+     * Recursive reference to a group in the regular expression. It calls
+     * matchRef because if the reference fails to match we would not unset
+     * the group.
+     */
+    static final class GroupRef extends Node {
+        GroupHead head;
+        GroupRef(GroupHead head) {
+            this.head = head;
+        }
+        boolean match(Matcher matcher, int i, CharSequence seq) {
+            return head.matchRef(matcher, i, seq)
+                && next.match(matcher, matcher.last, seq);
+        }
+        boolean study(TreeInfo info) {
+            info.maxValid = false;
+            info.deterministic = false;
+            return next.study(info);
+        }
     }
 
     /**
      * The GroupTail handles the setting of group beginning and ending
      * locations when groups are successfully matched. It must also be able to

@@ -4912,11 +4932,11 @@
             }
             return next.match(matcher, i, seq);
         }
         boolean matchInit(Matcher matcher, int i, CharSequence seq) {
             int save = matcher.locals[countIndex];
-            boolean ret;
+            boolean ret = false;
             if (posIndex != -1 && matcher.localsPos[posIndex] == null) {
                 matcher.localsPos[posIndex] = new IntHashSet();
             }
             if (0 < cmin) {
                 matcher.locals[countIndex] = 1;

@@ -5126,21 +5146,56 @@
             info.deterministic = false;
             return next.study(info);
         }
     }
 
+    static final class Conditional extends Node {
+        Node cond, yes, not;
+        Conditional(Node cond, Node yes, Node not) {
+            this.cond = cond;
+            this.yes = yes;
+            this.not = not;
+        }
+        boolean match(Matcher matcher, int i, CharSequence seq) {
+            if (cond.match(matcher, i, seq)) {
+                return yes.match(matcher, i, seq);
+            } else {
+                return not.match(matcher, i, seq);
+            }
+        }
+        boolean study(TreeInfo info) {
+            int minL = info.minLength;
+            int maxL = info.maxLength;
+            boolean maxV = info.maxValid;
+            info.reset();
+            yes.study(info);
+
+            int minL2 = info.minLength;
+            int maxL2 = info.maxLength;
+            boolean maxV2 = info.maxValid;
+            info.reset();
+            not.study(info);
+
+            info.minLength = minL + Math.min(minL2, info.minLength);
+            info.maxLength = maxL + Math.max(maxL2, info.maxLength);
+            info.maxValid = (maxV & maxV2 & info.maxValid);
+            info.deterministic = false;
+            return next.study(info);
+        }
+    }
+
     /**
      * Zero width positive lookahead.
      */
     static final class Pos extends Node {
         Node cond;
         Pos(Node cond) {
             this.cond = cond;
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {
             int savedTo = matcher.to;
-            boolean conditionMatched;
+            boolean conditionMatched = false;
 
             // Relax transparent region boundaries for lookahead
             if (matcher.transparentBounds)
                 matcher.to = matcher.getTextLength();
             try {

@@ -5161,11 +5216,11 @@
         Neg(Node cond) {
             this.cond = cond;
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {
             int savedTo = matcher.to;
-            boolean conditionMatched;
+            boolean conditionMatched = false;
 
             // Relax transparent region boundaries for lookahead
             if (matcher.transparentBounds)
                 matcher.to = matcher.getTextLength();
             try {

@@ -5187,19 +5242,15 @@
 
     /**
      * For use with lookbehinds; matches the position where the lookbehind
      * was encountered.
      */
-    static class LookBehindEndNode extends Node {
-        private LookBehindEndNode() {} // Singleton
-
-        static LookBehindEndNode INSTANCE = new LookBehindEndNode();
-
+    static Node lookbehindEnd = new Node() {
         boolean match(Matcher matcher, int i, CharSequence seq) {
             return i == matcher.lookbehindTo;
         }
-    }
+    };
 
     /**
      * Zero width positive lookbehind.
      */
     static class Behind extends Node {

@@ -5463,11 +5514,11 @@
             // a shift larger than the pattern length cannot
             // be used anyway.
             if (patternLength < 4) {
                 return node;
             }
-            int i, j;
+            int i, j, k;
             int[] lastOcc = new int[128];
             int[] optoSft = new int[patternLength];
             // Precalculate part of the bad character shift
             // It is a table for where in the pattern each
             // lower 7-bit value occurs

@@ -5618,11 +5669,11 @@
     }
 
     static interface BmpCharPredicate extends CharPredicate {
 
         default CharPredicate and(CharPredicate p) {
-            if (p instanceof BmpCharPredicate)
+            if(p instanceof BmpCharPredicate)
                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
             return ch -> is(ch) && p.is(ch);
         }
         default CharPredicate union(CharPredicate p) {
             if (p instanceof BmpCharPredicate)
< prev index next >