< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page

        

*** 2098,2108 **** * Parsing of sequences between alternations. */ private Node sequence(Node end) { Node head = null; Node tail = null; ! Node node; LOOP: for (;;) { int ch = peek(); switch (ch) { case '(': --- 2098,2108 ---- * Parsing of sequences between alternations. */ private Node sequence(Node end) { Node head = null; Node tail = null; ! Node node = null; LOOP: for (;;) { int ch = peek(); switch (ch) { case '(':
*** 2615,2624 **** --- 2615,2625 ---- */ private CharPredicate clazz(boolean consume) { CharPredicate prev = null; CharPredicate curr = null; BitClass bits = new BitClass(); + BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch]; boolean isNeg = false; boolean hasBits = false; int ch = next();
*** 2655,2667 **** ch = peek(); } if (hasBits) { // bits used, union has high precedence if (prev == null) { ! prev = curr = bits; } else { ! prev = prev.union(bits); } hasBits = false; } if (right != null) curr = right; --- 2656,2668 ---- ch = peek(); } if (hasBits) { // bits used, union has high precedence if (prev == null) { ! prev = curr = bitsP; } else { ! prev = prev.union(bitsP); } hasBits = false; } if (right != null) curr = right;
*** 2686,2698 **** case ']': if (prev != null || hasBits) { if (consume) next(); if (prev == null) ! prev = bits; else if (hasBits) ! prev = prev.union(bits); if (isNeg) return prev.negate(); return prev; } break; --- 2687,2699 ---- case ']': if (prev != null || hasBits) { if (consume) next(); if (prev == null) ! prev = bitsP; else if (hasBits) ! prev = prev.union(bitsP); if (isNeg) return prev.negate(); return prev; } break;
*** 2944,2955 **** * the group. Sometimes a double return system is used where the tail is * returned in root. */ private Node group0() { boolean capturingGroup = false; ! Node head; ! Node tail; int save = flags0; int saveTCNCount = topClosureNodes.size(); root = null; int ch = next(); if (ch == '?') { --- 2945,2956 ---- * the group. Sometimes a double return system is used where the tail is * returned in root. */ private Node group0() { boolean capturingGroup = false; ! Node head = null; ! Node tail = null; int save = flags0; int saveTCNCount = topClosureNodes.size(); root = null; int ch = next(); if (ch == '?') {
*** 2994,3004 **** } int start = cursor; head = createGroup(true); tail = root; head.next = expr(tail); ! tail.next = LookBehindEndNode.INSTANCE; TreeInfo info = new TreeInfo(); head.study(info); if (info.maxValid == false) { throw error("Look-behind group does not have " + "an obvious maximum length"); --- 2995,3005 ---- } int start = cursor; head = createGroup(true); tail = root; head.next = expr(tail); ! tail.next = lookbehindEnd; TreeInfo info = new TreeInfo(); head.study(info); if (info.maxValid == false) { throw error("Look-behind group does not have " + "an obvious maximum length");
*** 3250,3259 **** --- 3251,3261 ---- * Processes repetition. If the next character peeked is a quantifier * then new nodes must be appended to handle the repetition. * Prev could be a single or a group, so it could be a chain of nodes. */ private Node closure(Node prev) { + Node atom; int ch = peek(); switch (ch) { case '?': ch = next(); if (ch == '?') {
*** 3275,3317 **** try { do { cmin = Math.addExact(Math.multiplyExact(cmin, 10), ch - '0'); } while (ASCII.isDigit(ch = read())); if (ch == ',') { ch = read(); ! if (ch == '}') { ! unread(); ! return curly(prev, cmin); ! } else { cmax = 0; while (ASCII.isDigit(ch)) { cmax = Math.addExact(Math.multiplyExact(cmax, 10), ch - '0'); ch = read(); } } - } else { - cmax = cmin; } } catch (ArithmeticException ae) { throw error("Illegal repetition range"); } if (ch != '}') throw error("Unclosed counted closure"); if (cmax < cmin) throw error("Illegal repetition range"); ch = peek(); if (ch == '?') { next(); ! return new Curly(prev, cmin, cmax, Qtype.LAZY); } else if (ch == '+') { next(); ! return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE); } else { ! return new Curly(prev, cmin, cmax, Qtype.GREEDY); } } else { throw error("Illegal repetition"); } default: return prev; --- 3277,3318 ---- try { do { cmin = Math.addExact(Math.multiplyExact(cmin, 10), ch - '0'); } while (ASCII.isDigit(ch = read())); + cmax = cmin; if (ch == ',') { ch = read(); ! cmax = MAX_REPS; ! if (ch != '}') { cmax = 0; while (ASCII.isDigit(ch)) { cmax = Math.addExact(Math.multiplyExact(cmax, 10), ch - '0'); ch = read(); } } } } catch (ArithmeticException ae) { throw error("Illegal repetition range"); } if (ch != '}') throw error("Unclosed counted closure"); if (cmax < cmin) throw error("Illegal repetition range"); + Curly curly; ch = peek(); if (ch == '?') { next(); ! curly = new Curly(prev, cmin, cmax, Qtype.LAZY); } else if (ch == '+') { next(); ! curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE); } else { ! curly = new Curly(prev, cmin, cmax, Qtype.GREEDY); } + return curly; } else { throw error("Illegal repetition"); } default: return prev;
*** 3482,3495 **** /** * Creates a bit vector for matching Latin-1 values. A normal BitClass * never matches values above Latin-1, and a complemented BitClass always * matches values above Latin-1. */ ! static final class BitClass implements BmpCharPredicate { final boolean[] bits; BitClass() { ! bits = new boolean[256]; } BitClass add(int c, int flags) { assert c >= 0 && c <= 255; if ((flags & CASE_INSENSITIVE) != 0) { if (ASCII.isAscii(c)) { --- 3483,3500 ---- /** * Creates a bit vector for matching Latin-1 values. A normal BitClass * never matches values above Latin-1, and a complemented BitClass always * matches values above Latin-1. */ ! static final class BitClass extends BmpCharProperty { final boolean[] bits; BitClass() { ! this(new boolean[256]); ! } ! private BitClass(boolean[] bits) { ! super( ch -> ch < 256 && bits[ch]); ! this.bits = bits; } BitClass add(int c, int flags) { assert c >= 0 && c <= 255; if ((flags & CASE_INSENSITIVE) != 0) { if (ASCII.isAscii(c)) {
*** 3501,3516 **** } } bits[c] = true; return this; } - public boolean is(int ch) { - return ch < 256 && bits[ch]; - } } - /** * Utility method for creating a string slice matcher. */ private Node newSlice(int[] buf, int count, boolean hasSupplementary) { int[] tmp = new int[count]; --- 3506,3517 ----
*** 3919,3929 **** /** * Abstract node class to match one character satisfying some * boolean property. */ static class CharProperty extends Node { ! final CharPredicate predicate; CharProperty (CharPredicate predicate) { this.predicate = predicate; } boolean match(Matcher matcher, int i, CharSequence seq) { --- 3920,3930 ---- /** * Abstract node class to match one character satisfying some * boolean property. */ static class CharProperty extends Node { ! CharPredicate predicate; CharProperty (CharPredicate predicate) { this.predicate = predicate; } boolean match(Matcher matcher, int i, CharSequence seq) {
*** 3970,3989 **** boolean match(Matcher matcher, int i, CharSequence seq) { if (i < matcher.to) { int ch0 = Character.codePointAt(seq, i); int n = Character.charCount(ch0); ! int j = i + n; ! // Fast check if it's necessary to call Normalizer; ! // testing Grapheme.isBoundary is enough for this case ! while (j < matcher.to) { ! int ch1 = Character.codePointAt(seq, j); ! if (Grapheme.isBoundary(ch0, ch1)) ! break; ! ch0 = ch1; ! j += Character.charCount(ch1); ! } if (i + n == j) { // single, assume nfc cp if (predicate.is(ch0)) return next.match(matcher, j, seq); } else { while (i + n < j) { --- 3971,3981 ---- boolean match(Matcher matcher, int i, CharSequence seq) { if (i < matcher.to) { int ch0 = Character.codePointAt(seq, i); int n = Character.charCount(ch0); ! int j = Grapheme.nextBoundary(seq, i, matcher.to); if (i + n == j) { // single, assume nfc cp if (predicate.is(ch0)) return next.match(matcher, j, seq); } else { while (i + n < j) {
*** 4263,4284 **** } } } /** ! * Handles the greedy style repetition with the specified minimum ! * and the maximum equal to MAX_REPS, for *, + and {N,} quantifiers. */ static class CharPropertyGreedy extends Node { final CharPredicate predicate; final int cmin; CharPropertyGreedy(CharProperty cp, int cmin) { this.predicate = cp.predicate; this.cmin = cmin; } ! boolean match(Matcher matcher, int i, CharSequence seq) { int n = 0; int to = matcher.to; // greedy, all the way down while (i < to) { int ch = Character.codePointAt(seq, i); --- 4255,4276 ---- } } } /** ! * Handles the greedy style repetition with the minimum either be ! * 0 or 1 and the maximum be MAX_REPS, for * and + quantifier. */ static class CharPropertyGreedy extends Node { final CharPredicate predicate; final int cmin; CharPropertyGreedy(CharProperty cp, int cmin) { this.predicate = cp.predicate; this.cmin = cmin; } ! boolean match(Matcher matcher, int i, CharSequence seq) { int n = 0; int to = matcher.to; // greedy, all the way down while (i < to) { int ch = Character.codePointAt(seq, i);
*** 4317,4327 **** BmpCharPropertyGreedy(BmpCharProperty bcp, int cmin) { super(bcp, cmin); } ! boolean match(Matcher matcher, int i, CharSequence seq) { int n = 0; int to = matcher.to; while (i < to && predicate.is(seq.charAt(i))) { i++; n++; } --- 4309,4319 ---- BmpCharPropertyGreedy(BmpCharProperty bcp, int cmin) { super(bcp, cmin); } ! boolean match(Matcher matcher, int i, CharSequence seq) { int n = 0; int to = matcher.to; while (i < to && predicate.is(seq.charAt(i))) { i++; n++; }
*** 4694,4704 **** * "next" but not the "study", so we can collect the TreeInfo * of each atom node without including the TreeInfo of the * "next". */ static final class BranchConn extends Node { ! BranchConn() {} boolean match(Matcher matcher, int i, CharSequence seq) { return next.match(matcher, i, seq); } boolean study(TreeInfo info) { return info.deterministic; --- 4686,4696 ---- * "next" but not the "study", so we can collect the TreeInfo * of each atom node without including the TreeInfo of the * "next". */ static final class BranchConn extends Node { ! BranchConn() {}; boolean match(Matcher matcher, int i, CharSequence seq) { return next.match(matcher, i, seq); } boolean study(TreeInfo info) { return info.deterministic;
*** 4791,4800 **** --- 4783,4820 ---- matcher.locals[localIndex] = i; boolean ret = next.match(matcher, i, seq); matcher.locals[localIndex] = save; return ret; } + boolean matchRef(Matcher matcher, int i, CharSequence seq) { + int save = matcher.locals[localIndex]; + matcher.locals[localIndex] = ~i; // HACK + boolean ret = next.match(matcher, i, seq); + matcher.locals[localIndex] = save; + return ret; + } + } + + /** + * Recursive reference to a group in the regular expression. It calls + * matchRef because if the reference fails to match we would not unset + * the group. + */ + static final class GroupRef extends Node { + GroupHead head; + GroupRef(GroupHead head) { + this.head = head; + } + boolean match(Matcher matcher, int i, CharSequence seq) { + return head.matchRef(matcher, i, seq) + && next.match(matcher, matcher.last, seq); + } + boolean study(TreeInfo info) { + info.maxValid = false; + info.deterministic = false; + return next.study(info); + } } /** * The GroupTail handles the setting of group beginning and ending * locations when groups are successfully matched. It must also be able to
*** 4912,4922 **** } return next.match(matcher, i, seq); } boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; ! boolean ret; if (posIndex != -1 && matcher.localsPos[posIndex] == null) { matcher.localsPos[posIndex] = new IntHashSet(); } if (0 < cmin) { matcher.locals[countIndex] = 1; --- 4932,4942 ---- } return next.match(matcher, i, seq); } boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; ! boolean ret = false; if (posIndex != -1 && matcher.localsPos[posIndex] == null) { matcher.localsPos[posIndex] = new IntHashSet(); } if (0 < cmin) { matcher.locals[countIndex] = 1;
*** 5126,5146 **** info.deterministic = false; return next.study(info); } } /** * Zero width positive lookahead. */ static final class Pos extends Node { Node cond; Pos(Node cond) { this.cond = cond; } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; ! boolean conditionMatched; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) matcher.to = matcher.getTextLength(); try { --- 5146,5201 ---- info.deterministic = false; return next.study(info); } } + static final class Conditional extends Node { + Node cond, yes, not; + Conditional(Node cond, Node yes, Node not) { + this.cond = cond; + this.yes = yes; + this.not = not; + } + boolean match(Matcher matcher, int i, CharSequence seq) { + if (cond.match(matcher, i, seq)) { + return yes.match(matcher, i, seq); + } else { + return not.match(matcher, i, seq); + } + } + boolean study(TreeInfo info) { + int minL = info.minLength; + int maxL = info.maxLength; + boolean maxV = info.maxValid; + info.reset(); + yes.study(info); + + int minL2 = info.minLength; + int maxL2 = info.maxLength; + boolean maxV2 = info.maxValid; + info.reset(); + not.study(info); + + info.minLength = minL + Math.min(minL2, info.minLength); + info.maxLength = maxL + Math.max(maxL2, info.maxLength); + info.maxValid = (maxV & maxV2 & info.maxValid); + info.deterministic = false; + return next.study(info); + } + } + /** * Zero width positive lookahead. */ static final class Pos extends Node { Node cond; Pos(Node cond) { this.cond = cond; } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; ! boolean conditionMatched = false; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) matcher.to = matcher.getTextLength(); try {
*** 5161,5171 **** Neg(Node cond) { this.cond = cond; } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; ! boolean conditionMatched; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) matcher.to = matcher.getTextLength(); try { --- 5216,5226 ---- Neg(Node cond) { this.cond = cond; } boolean match(Matcher matcher, int i, CharSequence seq) { int savedTo = matcher.to; ! boolean conditionMatched = false; // Relax transparent region boundaries for lookahead if (matcher.transparentBounds) matcher.to = matcher.getTextLength(); try {
*** 5187,5205 **** /** * For use with lookbehinds; matches the position where the lookbehind * was encountered. */ ! static class LookBehindEndNode extends Node { ! private LookBehindEndNode() {} // Singleton ! ! static LookBehindEndNode INSTANCE = new LookBehindEndNode(); ! boolean match(Matcher matcher, int i, CharSequence seq) { return i == matcher.lookbehindTo; } ! } /** * Zero width positive lookbehind. */ static class Behind extends Node { --- 5242,5256 ---- /** * For use with lookbehinds; matches the position where the lookbehind * was encountered. */ ! static Node lookbehindEnd = new Node() { boolean match(Matcher matcher, int i, CharSequence seq) { return i == matcher.lookbehindTo; } ! }; /** * Zero width positive lookbehind. */ static class Behind extends Node {
*** 5463,5473 **** // a shift larger than the pattern length cannot // be used anyway. if (patternLength < 4) { return node; } ! int i, j; int[] lastOcc = new int[128]; int[] optoSft = new int[patternLength]; // Precalculate part of the bad character shift // It is a table for where in the pattern each // lower 7-bit value occurs --- 5514,5524 ---- // a shift larger than the pattern length cannot // be used anyway. if (patternLength < 4) { return node; } ! int i, j, k; int[] lastOcc = new int[128]; int[] optoSft = new int[patternLength]; // Precalculate part of the bad character shift // It is a table for where in the pattern each // lower 7-bit value occurs
*** 5618,5628 **** } static interface BmpCharPredicate extends CharPredicate { default CharPredicate and(CharPredicate p) { ! if (p instanceof BmpCharPredicate) return (BmpCharPredicate)(ch -> is(ch) && p.is(ch)); return ch -> is(ch) && p.is(ch); } default CharPredicate union(CharPredicate p) { if (p instanceof BmpCharPredicate) --- 5669,5679 ---- } static interface BmpCharPredicate extends CharPredicate { default CharPredicate and(CharPredicate p) { ! if(p instanceof BmpCharPredicate) return (BmpCharPredicate)(ch -> is(ch) && p.is(ch)); return ch -> is(ch) && p.is(ch); } default CharPredicate union(CharPredicate p) { if (p instanceof BmpCharPredicate)
< prev index next >