< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
  */
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.

@@ -20,11 +20,10 @@
 
 package com.sun.org.apache.xerces.internal.impl ;
 
 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
 import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
-import com.sun.org.apache.xerces.internal.impl.io.UTF16Reader;
 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
 import com.sun.org.apache.xerces.internal.util.*;
 import com.sun.org.apache.xerces.internal.util.URI;

@@ -88,11 +87,11 @@
  * @author Andy Clark, IBM
  * @author Arnaud  Le Hors, IBM
  * @author K.Venugopal SUN Microsystems
  * @author Neeraj Bajaj SUN Microsystems
  * @author Sunitha Reddy SUN Microsystems
- * @LastModified: Apr 2019
+ * @LastModified: Nov 2018
  */
 public class XMLEntityManager implements XMLComponent, XMLEntityResolver {
 
     //
     // Constants

@@ -411,10 +410,13 @@
     private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
 
     /** Augmentations for entities. */
     private final Augmentations fEntityAugs = new AugmentationsImpl();
 
+    /** Pool of character buffers. */
+    private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
+
     /** indicate whether Catalog should be used for resolving external resources */
     private boolean fUseCatalog = true;
     CatalogFeatures fCatalogFeatures;
     CatalogResolver fCatalogResolver;
 

@@ -690,58 +692,54 @@
                     }
                 }
             }
 
             // wrap this stream in RewindableInputStream
-            RewindableInputStream rewindableStream = new RewindableInputStream(stream);
-            stream = rewindableStream;
+            stream = new RewindableInputStream(stream);
 
             // perform auto-detect of encoding if necessary
             if (encoding == null) {
                 // read first four bytes and determine encoding
                 final byte[] b4 = new byte[4];
                 int count = 0;
                 for (; count<4; count++ ) {
-                    b4[count] = (byte)rewindableStream.readAndBuffer();
+                    b4[count] = (byte)stream.read();
                 }
                 if (count == 4) {
-                    final EncodingInfo info = getEncodingInfo(b4, count);
-                    encoding = info.autoDetectedEncoding;
-                    final String readerEncoding = info.readerEncoding;
-                    isBigEndian = info.isBigEndian;
+                    Object [] encodingDesc = getEncodingName(b4, count);
+                    encoding = (String)(encodingDesc[0]);
+                    isBigEndian = (Boolean)(encodingDesc[1]);
+
                     stream.reset();
-                    if (info.hasBOM) {
-                        // Special case UTF-8 files with BOM created by Microsoft
-                        // tools. It's more efficient to consume the BOM than make
-                        // the reader perform extra checks. -Ac
-                        if (EncodingInfo.STR_UTF8.equals(readerEncoding)) {
-                            // UTF-8 BOM: 0xEF 0xBB 0xBF
+                    // Special case UTF-8 files with BOM created by Microsoft
+                    // tools. It's more efficient to consume the BOM than make
+                    // the reader perform extra checks. -Ac
+                    if (count > 2 && encoding.equals("UTF-8")) {
+                        int b0 = b4[0] & 0xFF;
+                        int b1 = b4[1] & 0xFF;
+                        int b2 = b4[2] & 0xFF;
+                        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
+                            // ignore first three bytes...
                             stream.skip(3);
                         }
-                        // It's also more efficient to consume the UTF-16 BOM.
-                        else if (EncodingInfo.STR_UTF16.equals(readerEncoding)) {
-                            // UTF-16 BE BOM: 0xFE 0xFF
-                            // UTF-16 LE BOM: 0xFF 0xFE
-                            stream.skip(2);
-                        }
                     }
-                    reader = createReader(stream, readerEncoding, isBigEndian);
+                    reader = createReader(stream, encoding, isBigEndian);
                 } else {
                     reader = createReader(stream, encoding, isBigEndian);
                 }
             }
 
             // use specified encoding
             else {
                 encoding = encoding.toUpperCase(Locale.ENGLISH);
 
                 // If encoding is UTF-8, consume BOM if one is present.
-                if (EncodingInfo.STR_UTF8.equals(encoding)) {
+                if (encoding.equals("UTF-8")) {
                     final int[] b3 = new int[3];
                     int count = 0;
                     for (; count < 3; ++count) {
-                        b3[count] = rewindableStream.readAndBuffer();
+                        b3[count] = stream.read();
                         if (b3[count] == -1)
                             break;
                     }
                     if (count == 3) {
                         if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {

@@ -750,55 +748,60 @@
                         }
                     } else {
                         stream.reset();
                     }
                 }
-                // If encoding is UTF-16, we still need to read the first
-                // four bytes, in order to discover the byte order.
-                else if (EncodingInfo.STR_UTF16.equals(encoding)) {
+                // If encoding is UTF-16, we still need to read the first four bytes
+                // in order to discover the byte order.
+                else if (encoding.equals("UTF-16")) {
                     final int[] b4 = new int[4];
                     int count = 0;
                     for (; count < 4; ++count) {
-                        b4[count] = rewindableStream.readAndBuffer();
+                        b4[count] = stream.read();
                         if (b4[count] == -1)
                             break;
                     }
                     stream.reset();
+
+                    String utf16Encoding = "UTF-16";
                     if (count >= 2) {
                         final int b0 = b4[0];
                         final int b1 = b4[1];
                         if (b0 == 0xFE && b1 == 0xFF) {
                             // UTF-16, big-endian
+                            utf16Encoding = "UTF-16BE";
                             isBigEndian = Boolean.TRUE;
-                            stream.skip(2);
                         }
                         else if (b0 == 0xFF && b1 == 0xFE) {
                             // UTF-16, little-endian
+                            utf16Encoding = "UTF-16LE";
                             isBigEndian = Boolean.FALSE;
-                            stream.skip(2);
                         }
                         else if (count == 4) {
                             final int b2 = b4[2];
                             final int b3 = b4[3];
                             if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
                                 // UTF-16, big-endian, no BOM
+                                utf16Encoding = "UTF-16BE";
                                 isBigEndian = Boolean.TRUE;
                             }
                             if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
                                 // UTF-16, little-endian, no BOM
+                                utf16Encoding = "UTF-16LE";
                                 isBigEndian = Boolean.FALSE;
                             }
                         }
                     }
+                    reader = createReader(stream, utf16Encoding, isBigEndian);
                 }
                 // If encoding is UCS-4, we still need to read the first four bytes
                 // in order to discover the byte order.
-                else if (EncodingInfo.STR_UCS4.equals(encoding)) {
+                else if (encoding.equals("ISO-10646-UCS-4")) {
                     final int[] b4 = new int[4];
                     int count = 0;
                     for (; count < 4; ++count) {
-                        b4[count] = rewindableStream.readAndBuffer();
+                        b4[count] = stream.read();
                         if (b4[count] == -1)
                             break;
                     }
                     stream.reset();
 

@@ -814,15 +817,15 @@
                         }
                     }
                 }
                 // If encoding is UCS-2, we still need to read the first four bytes
                 // in order to discover the byte order.
-                else if (EncodingInfo.STR_UCS2.equals(encoding)) {
+                else if (encoding.equals("ISO-10646-UCS-2")) {
                     final int[] b4 = new int[4];
                     int count = 0;
                     for (; count < 4; ++count) {
-                        b4[count] = rewindableStream.readAndBuffer();
+                        b4[count] = stream.read();
                         if (b4[count] == -1)
                             break;
                     }
                     stream.reset();
 

@@ -1793,10 +1796,11 @@
                 Integer bufferSize = (Integer)value;
                 if (bufferSize != null &&
                     bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
                     fBufferSize = bufferSize.intValue();
                     fEntityScanner.setBufferSize(fBufferSize);
+                    fBufferPool.setExternalBufferSize(fBufferSize);
                 }
             }
             if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() &&
                 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
                 fSecurityManager = (XMLSecurityManager)value;

@@ -2419,87 +2423,88 @@
      * Returns the IANA encoding name that is auto-detected from
      * the bytes specified, with the endian-ness of that encoding where appropriate.
      *
      * @param b4    The first four bytes of the input.
      * @param count The number of bytes actually read.
-     * @return an instance of EncodingInfo which represents the auto-detected encoding.
+     * @return a 2-element array:  the first element, an IANA-encoding string,
+     *  the second element a Boolean which is true iff the document is big endian, false
+     *  if it's little-endian, and null if the distinction isn't relevant.
      */
-    protected EncodingInfo getEncodingInfo(byte[] b4, int count) {
+    protected Object[] getEncodingName(byte[] b4, int count) {
 
         if (count < 2) {
-            return EncodingInfo.UTF_8;
+            return defaultEncoding;
         }
 
         // UTF-16, with BOM
         int b0 = b4[0] & 0xFF;
         int b1 = b4[1] & 0xFF;
         if (b0 == 0xFE && b1 == 0xFF) {
             // UTF-16, big-endian
-            return EncodingInfo.UTF_16_BIG_ENDIAN_WITH_BOM;
+            return new Object [] {"UTF-16BE", true};
         }
         if (b0 == 0xFF && b1 == 0xFE) {
             // UTF-16, little-endian
-            return EncodingInfo.UTF_16_LITTLE_ENDIAN_WITH_BOM;
+            return new Object [] {"UTF-16LE", false};
         }
 
         // default to UTF-8 if we don't have enough bytes to make a
         // good determination of the encoding
         if (count < 3) {
-            return EncodingInfo.UTF_8;
+            return defaultEncoding;
         }
 
         // UTF-8 with a BOM
         int b2 = b4[2] & 0xFF;
         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
-            return EncodingInfo.UTF_8_WITH_BOM;
+            return defaultEncoding;
         }
 
         // default to UTF-8 if we don't have enough bytes to make a
         // good determination of the encoding
         if (count < 4) {
-            return EncodingInfo.UTF_8;
+            return defaultEncoding;
         }
 
         // other encodings
         int b3 = b4[3] & 0xFF;
         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
             // UCS-4, big endian (1234)
-            return EncodingInfo.UCS_4_BIG_ENDIAN;
+            return new Object [] {"ISO-10646-UCS-4", true};
         }
         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
             // UCS-4, little endian (4321)
-            return EncodingInfo.UCS_4_LITTLE_ENDIAN;
+            return new Object [] {"ISO-10646-UCS-4", false};
         }
         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
             // UCS-4, unusual octet order (2143)
             // REVISIT: What should this be?
-            return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
+            return new Object [] {"ISO-10646-UCS-4", null};
         }
         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
             // UCS-4, unusual octect order (3412)
             // REVISIT: What should this be?
-            return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER;
+            return new Object [] {"ISO-10646-UCS-4", null};
         }
         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
             // UTF-16, big-endian, no BOM
             // (or could turn out to be UCS-2...
             // REVISIT: What should this be?
-            return EncodingInfo.UTF_16_BIG_ENDIAN;
+            return new Object [] {"UTF-16BE", true};
         }
         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
             // UTF-16, little-endian, no BOM
             // (or could turn out to be UCS-2...
-            return EncodingInfo.UTF_16_LITTLE_ENDIAN;
+            return new Object [] {"UTF-16LE", false};
         }
         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
             // EBCDIC
             // a la xerces1, return CP037 instead of EBCDIC here
-            return EncodingInfo.EBCDIC;
+            return new Object [] {"CP037", null};
         }
 
-        // default encoding
-        return EncodingInfo.UTF_8;
+        return defaultEncoding;
 
     } // getEncodingName(byte[],int):Object[]
 
     /**
      * Creates a reader capable of reading the given input stream in

@@ -2510,99 +2515,99 @@
      *                     encoded using. If the user has specified that
      *                     Java encoding names are allowed, then the
      *                     encoding name may be a Java encoding name;
      *                     otherwise, it is an ianaEncoding name.
      * @param isBigEndian   For encodings (like uCS-4), whose names cannot
-     *                      specify a byte order, this tells whether the order
-     *                      is bigEndian.  null if unknown or irrelevant.
+     *                      specify a byte order, this tells whether the order is bigEndian.  null menas
+     *                      unknown or not relevant.
      *
      * @return Returns a reader.
      */
     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
-        throws IOException {
+    throws IOException {
 
-        String enc = (encoding != null) ? encoding : EncodingInfo.STR_UTF8;
-        enc = enc.toUpperCase(Locale.ENGLISH);
-        MessageFormatter f = fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN);
-        Locale l = fErrorReporter.getLocale();
-        switch (enc) {
-            case EncodingInfo.STR_UTF8:
-                return new UTF8Reader(inputStream, fBufferSize, f, l);
-            case EncodingInfo.STR_UTF16:
-                if (isBigEndian != null) {
-                    return new UTF16Reader(inputStream, fBufferSize, isBigEndian, f, l);
-                }
-                break;
-            case EncodingInfo.STR_UTF16BE:
-                return new UTF16Reader(inputStream, fBufferSize, true, f, l);
-            case EncodingInfo.STR_UTF16LE:
-                return new UTF16Reader(inputStream, fBufferSize, false, f, l);
-            case EncodingInfo.STR_UCS4:
-                if(isBigEndian != null) {
-                    if(isBigEndian) {
-                        return new UCSReader(inputStream, UCSReader.UCS4BE);
-                    } else {
-                        return new UCSReader(inputStream, UCSReader.UCS4LE);
-                    }
+        // normalize encoding name
+        if (encoding == null) {
+            encoding = "UTF-8";
+        }
+
+        // try to use an optimized reader
+        String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
+        if (ENCODING.equals("UTF-8")) {
+            if (DEBUG_ENCODINGS) {
+                System.out.println("$$$ creating UTF8Reader");
+            }
+            return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
+        }
+        if (ENCODING.equals("US-ASCII")) {
+            if (DEBUG_ENCODINGS) {
+                System.out.println("$$$ creating ASCIIReader");
+            }
+            return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
+        }
+        if(ENCODING.equals("ISO-10646-UCS-4")) {
+            if(isBigEndian != null) {
+                boolean isBE = isBigEndian.booleanValue();
+                if(isBE) {
+                    return new UCSReader(inputStream, UCSReader.UCS4BE);
                 } else {
-                    fErrorReporter.reportError(this.getEntityScanner(),
-                            XMLMessageFormatter.XML_DOMAIN,
-                            "EncodingByteOrderUnsupported",
-                            new Object[] { encoding },
-                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
+                    return new UCSReader(inputStream, UCSReader.UCS4LE);
                 }
-                break;
-            case EncodingInfo.STR_UCS2:
-                if(isBigEndian != null) {
-                    if(isBigEndian) {
-                        return new UCSReader(inputStream, UCSReader.UCS2BE);
-                    } else {
-                        return new UCSReader(inputStream, UCSReader.UCS2LE);
-                    }
+            } else {
+                fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
+                        "EncodingByteOrderUnsupported",
+                        new Object[] { encoding },
+                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
+            }
+        }
+        if(ENCODING.equals("ISO-10646-UCS-2")) {
+            if(isBigEndian != null) { // sould never happen with this encoding...
+                boolean isBE = isBigEndian.booleanValue();
+                if(isBE) {
+                    return new UCSReader(inputStream, UCSReader.UCS2BE);
                 } else {
-                    fErrorReporter.reportError(this.getEntityScanner(),
-                            XMLMessageFormatter.XML_DOMAIN,
-                            "EncodingByteOrderUnsupported",
-                            new Object[] { encoding },
-                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
+                    return new UCSReader(inputStream, UCSReader.UCS2LE);
                 }
-                break;
+            } else {
+                fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
+                        "EncodingByteOrderUnsupported",
+                        new Object[] { encoding },
+                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
+            }
         }
 
         // check for valid name
         boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
         boolean validJava = XMLChar.isValidJavaEncoding(encoding);
         if (!validIANA || (fAllowJavaEncodings && !validJava)) {
-            fErrorReporter.reportError(this.getEntityScanner(),
-                    XMLMessageFormatter.XML_DOMAIN,
+            fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                     "EncodingDeclInvalid",
                     new Object[] { encoding },
                     XMLErrorReporter.SEVERITY_FATAL_ERROR);
-            // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
-            //       because every byte is a valid ISO Latin 1 character.
-            //       It may not translate correctly but if we failed on
-            //       the encoding anyway, then we're expecting the content
-            //       of the document to be bad. This will just prevent an
-            //       invalid UTF-8 sequence to be detected. This is only
-            //       important when continue-after-fatal-error is turned
-            //       on. -Ac
+                    // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
+                    //       because every byte is a valid ISO Latin 1 character.
+                    //       It may not translate correctly but if we failed on
+                    //       the encoding anyway, then we're expecting the content
+                    //       of the document to be bad. This will just prevent an
+                    //       invalid UTF-8 sequence to be detected. This is only
+                    //       important when continue-after-fatal-error is turned
+                    //       on. -Ac
                     encoding = "ISO-8859-1";
         }
 
         // try to use a Java reader
-        String javaEncoding = EncodingMap.getIANA2JavaMapping(enc);
+        String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
         if (javaEncoding == null) {
-            if (fAllowJavaEncodings) {
+            if(fAllowJavaEncodings) {
                 javaEncoding = encoding;
             } else {
-                fErrorReporter.reportError(this.getEntityScanner(),
-                        XMLMessageFormatter.XML_DOMAIN,
+                fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN,
                         "EncodingDeclInvalid",
                         new Object[] { encoding },
                         XMLErrorReporter.SEVERITY_FATAL_ERROR);
-                // see comment above.
-                javaEncoding = "ISO8859_1";
+                        // see comment above.
+                        javaEncoding = "ISO8859_1";
             }
         }
         if (DEBUG_ENCODINGS) {
             System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
             if (javaEncoding == encoding) {

@@ -2891,82 +2896,112 @@
             }
         }
     } // print()
 
     /**
-     * Information about auto-detectable encodings.
+     * Buffer used in entity manager to reuse character arrays instead
+     * of creating new ones every time.
+     *
+     * @xerces.internal
+     *
+     * @author Ankit Pasricha, IBM
+     */
+    private static class CharacterBuffer {
+
+        /** character buffer */
+        private char[] ch;
+
+        /** whether the buffer is for an external or internal scanned entity */
+        private boolean isExternal;
+
+        public CharacterBuffer(boolean isExternal, int size) {
+            this.isExternal = isExternal;
+            ch = new char[size];
+        }
+    }
+
+
+     /**
+     * Stores a number of character buffers and provides it to the entity
+     * manager to use when an entity is seen.
      *
      * @xerces.internal
      *
-     * @author Michael Glavassevich, IBM
+     * @author Ankit Pasricha, IBM
      */
-    private static class EncodingInfo {
-        public static final String STR_UTF8 = "UTF-8";
-        public static final String STR_UTF16 = "UTF-16";
-        public static final String STR_UTF16BE = "UTF-16BE";
-        public static final String STR_UTF16LE = "UTF-16LE";
-        public static final String STR_UCS4 = "ISO-10646-UCS-4";
-        public static final String STR_UCS2 = "ISO-10646-UCS-2";
-        public static final String STR_CP037 = "CP037";
-
-        /** UTF-8 **/
-        public static final EncodingInfo UTF_8 =
-                new EncodingInfo(STR_UTF8, null, false);
-
-        /** UTF-8, with BOM **/
-        public static final EncodingInfo UTF_8_WITH_BOM =
-                new EncodingInfo(STR_UTF8, null, true);
-
-        /** UTF-16, big-endian **/
-        public static final EncodingInfo UTF_16_BIG_ENDIAN =
-                new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, false);
-
-        /** UTF-16, big-endian with BOM **/
-        public static final EncodingInfo UTF_16_BIG_ENDIAN_WITH_BOM =
-                new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, true);
-
-        /** UTF-16, little-endian **/
-        public static final EncodingInfo UTF_16_LITTLE_ENDIAN =
-                new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, false);
-
-        /** UTF-16, little-endian with BOM **/
-        public static final EncodingInfo UTF_16_LITTLE_ENDIAN_WITH_BOM =
-                new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, true);
-
-        /** UCS-4, big-endian **/
-        public static final EncodingInfo UCS_4_BIG_ENDIAN =
-                new EncodingInfo(STR_UCS4, Boolean.TRUE, false);
-
-        /** UCS-4, little-endian **/
-        public static final EncodingInfo UCS_4_LITTLE_ENDIAN =
-                new EncodingInfo(STR_UCS4, Boolean.FALSE, false);
-
-        /** UCS-4, unusual byte-order (2143) or (3412) **/
-        public static final EncodingInfo UCS_4_UNUSUAL_BYTE_ORDER =
-                new EncodingInfo(STR_UCS4, null, false);
-
-        /** EBCDIC **/
-        public static final EncodingInfo EBCDIC = new EncodingInfo(STR_CP037, null, false);
-
-        public final String autoDetectedEncoding;
-        public final String readerEncoding;
-        public final Boolean isBigEndian;
-        public final boolean hasBOM;
-
-        private EncodingInfo(String autoDetectedEncoding, Boolean isBigEndian, boolean hasBOM) {
-            this(autoDetectedEncoding, autoDetectedEncoding, isBigEndian, hasBOM);
-        } // <init>(String,Boolean,boolean)
-
-        private EncodingInfo(String autoDetectedEncoding, String readerEncoding,
-                Boolean isBigEndian, boolean hasBOM) {
-            this.autoDetectedEncoding = autoDetectedEncoding;
-            this.readerEncoding = readerEncoding;
-            this.isBigEndian = isBigEndian;
-            this.hasBOM = hasBOM;
-        } // <init>(String,String,Boolean,boolean)
+    private static class CharacterBufferPool {
+
+        private static final int DEFAULT_POOL_SIZE = 3;
+
+        private CharacterBuffer[] fInternalBufferPool;
+        private CharacterBuffer[] fExternalBufferPool;
 
-    } // class EncodingInfo
+        private int fExternalBufferSize;
+        private int fInternalBufferSize;
+        private int poolSize;
+
+        private int fInternalTop;
+        private int fExternalTop;
+
+        public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
+            this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
+        }
+
+        public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
+            fExternalBufferSize = externalBufferSize;
+            fInternalBufferSize = internalBufferSize;
+            this.poolSize = poolSize;
+            init();
+        }
+
+        /** Initializes buffer pool. **/
+        private void init() {
+            fInternalBufferPool = new CharacterBuffer[poolSize];
+            fExternalBufferPool = new CharacterBuffer[poolSize];
+            fInternalTop = -1;
+            fExternalTop = -1;
+        }
+
+        /** Retrieves buffer from pool. **/
+        public CharacterBuffer getBuffer(boolean external) {
+            if (external) {
+                if (fExternalTop > -1) {
+                    return fExternalBufferPool[fExternalTop--];
+                }
+                else {
+                    return new CharacterBuffer(true, fExternalBufferSize);
+                }
+            }
+            else {
+                if (fInternalTop > -1) {
+                    return fInternalBufferPool[fInternalTop--];
+                }
+                else {
+                    return new CharacterBuffer(false, fInternalBufferSize);
+                }
+            }
+        }
+
+        /** Returns buffer to pool. **/
+        public void returnToPool(CharacterBuffer buffer) {
+            if (buffer.isExternal) {
+                if (fExternalTop < fExternalBufferPool.length - 1) {
+                    fExternalBufferPool[++fExternalTop] = buffer;
+                }
+            }
+            else if (fInternalTop < fInternalBufferPool.length - 1) {
+                fInternalBufferPool[++fInternalTop] = buffer;
+            }
+        }
+
+        /** Sets the size of external buffers and dumps the old pool. **/
+        public void setExternalBufferSize(int bufferSize) {
+            fExternalBufferSize = bufferSize;
+            fExternalBufferPool = new CharacterBuffer[poolSize];
+            fExternalTop = -1;
+        }
+    }
 
     /**
     * This class wraps the byte inputstreams we're presented with.
     * We need it because java.io.InputStreams don't provide
     * functionality to reread processed bytes, and they have a habit

@@ -3015,63 +3050,63 @@
 
         public void rewind() {
             fOffset = fStartOffset;
         }
 
-        public int readAndBuffer() throws IOException {
+        public int read() throws IOException {
+            int b = 0;
+            if (fOffset < fLength) {
+                return fData[fOffset++] & 0xff;
+            }
+            if (fOffset == fEndOffset) {
+                return -1;
+            }
             if (fOffset == fData.length) {
                 byte[] newData = new byte[fOffset << 1];
                 System.arraycopy(fData, 0, newData, 0, fOffset);
                 fData = newData;
             }
-            final int b = fInputStream.read();
+            b = fInputStream.read();
             if (b == -1) {
                 fEndOffset = fOffset;
                 return -1;
             }
             fData[fLength++] = (byte)b;
             fOffset++;
             return b & 0xff;
         }
 
-        public int read() throws IOException {
-            if (fOffset < fLength) {
-                return fData[fOffset++] & 0xff;
-            }
-            if (fOffset == fEndOffset) {
-                return -1;
-            }
-            if (fCurrentEntity.mayReadChunks) {
-                return fInputStream.read();
-            }
-            return readAndBuffer();
-        }
-
         public int read(byte[] b, int off, int len) throws IOException {
-            final int bytesLeft = fLength - fOffset;
+            int bytesLeft = fLength - fOffset;
             if (bytesLeft == 0) {
                 if (fOffset == fEndOffset) {
                     return -1;
                 }
 
-                // read a block of data as requested
+                /**
+                 * //System.out.println("fCurrentEntitty = " + fCurrentEntity );
+                 * //System.out.println("fInputStream = " + fInputStream );
+                 * // better get some more for the voracious reader... */
+
                 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) {
 
                     if (!fCurrentEntity.xmlDeclChunkRead)
                     {
                         fCurrentEntity.xmlDeclChunkRead = true;
                         len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE;
                     }
                     return fInputStream.read(b, off, len);
                 }
-                int returnedVal = readAndBuffer();
-                if (returnedVal == -1) {
-                    fEndOffset = fOffset;
-                    return -1;
+
+                int returnedVal = read();
+                if(returnedVal == -1) {
+                  fEndOffset = fOffset;
+                  return -1;
                 }
                 b[off] = (byte)returnedVal;
                 return 1;
+
             }
             if (len < bytesLeft) {
                 if (len <= 0) {
                     return 0;
                 }

@@ -3083,11 +3118,12 @@
             }
             fOffset += len;
             return len;
         }
 
-        public long skip(long n) throws IOException {
+        public long skip(long n)
+        throws IOException {
             int bytesLeft;
             if (n <= 0) {
                 return 0;
             }
             bytesLeft = fLength - fOffset;

@@ -3104,11 +3140,11 @@
             fOffset += bytesLeft;
             if (fOffset == fEndOffset) {
                 return bytesLeft;
             }
             n -= bytesLeft;
-           /*
+            /*
             * In a manner of speaking, when this class isn't permitting more
             * than one byte at a time to be read, it is "blocking".  The
             * available() method should indicate how much can be read without
             * blocking, so while we're in this mode, it should only indicate
             * that bytes in its buffer are available; otherwise, the result of

@@ -3116,27 +3152,28 @@
             */
             return fInputStream.skip(n) + bytesLeft;
         }
 
         public int available() throws IOException {
-            final int bytesLeft = fLength - fOffset;
+            int bytesLeft = fLength - fOffset;
             if (bytesLeft == 0) {
                 if (fOffset == fEndOffset) {
                     return -1;
                 }
                 return fCurrentEntity.mayReadChunks ? fInputStream.available()
-                                                    : 0;
+                : 0;
             }
             return bytesLeft;
         }
 
         public void mark(int howMuch) {
             fMark = fOffset;
         }
 
         public void reset() {
             fOffset = fMark;
+            //test();
         }
 
         public boolean markSupported() {
             return true;
         }
< prev index next >