< prev index next >

src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLEntityManager.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. --- 1,7 ---- /* ! * Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership.
*** 20,30 **** package com.sun.org.apache.xerces.internal.impl ; import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; import com.sun.org.apache.xerces.internal.impl.io.UCSReader; - import com.sun.org.apache.xerces.internal.impl.io.UTF16Reader; import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; import com.sun.org.apache.xerces.internal.util.*; import com.sun.org.apache.xerces.internal.util.URI; --- 20,29 ----
*** 88,98 **** * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author K.Venugopal SUN Microsystems * @author Neeraj Bajaj SUN Microsystems * @author Sunitha Reddy SUN Microsystems ! * @LastModified: Apr 2019 */ public class XMLEntityManager implements XMLComponent, XMLEntityResolver { // // Constants --- 87,97 ---- * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author K.Venugopal SUN Microsystems * @author Neeraj Bajaj SUN Microsystems * @author Sunitha Reddy SUN Microsystems ! * @LastModified: Nov 2018 */ public class XMLEntityManager implements XMLComponent, XMLEntityResolver { // // Constants
*** 411,420 **** --- 410,422 ---- private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); /** Augmentations for entities. */ private final Augmentations fEntityAugs = new AugmentationsImpl(); + /** Pool of character buffers. */ + private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); + /** indicate whether Catalog should be used for resolving external resources */ private boolean fUseCatalog = true; CatalogFeatures fCatalogFeatures; CatalogResolver fCatalogResolver;
*** 690,747 **** } } } // wrap this stream in RewindableInputStream ! RewindableInputStream rewindableStream = new RewindableInputStream(stream); ! stream = rewindableStream; // perform auto-detect of encoding if necessary if (encoding == null) { // read first four bytes and determine encoding final byte[] b4 = new byte[4]; int count = 0; for (; count<4; count++ ) { ! b4[count] = (byte)rewindableStream.readAndBuffer(); } if (count == 4) { ! final EncodingInfo info = getEncodingInfo(b4, count); ! encoding = info.autoDetectedEncoding; ! final String readerEncoding = info.readerEncoding; ! isBigEndian = info.isBigEndian; stream.reset(); ! if (info.hasBOM) { ! // Special case UTF-8 files with BOM created by Microsoft ! // tools. It's more efficient to consume the BOM than make ! // the reader perform extra checks. -Ac ! if (EncodingInfo.STR_UTF8.equals(readerEncoding)) { ! // UTF-8 BOM: 0xEF 0xBB 0xBF stream.skip(3); } - // It's also more efficient to consume the UTF-16 BOM. - else if (EncodingInfo.STR_UTF16.equals(readerEncoding)) { - // UTF-16 BE BOM: 0xFE 0xFF - // UTF-16 LE BOM: 0xFF 0xFE - stream.skip(2); - } } ! reader = createReader(stream, readerEncoding, isBigEndian); } else { reader = createReader(stream, encoding, isBigEndian); } } // use specified encoding else { encoding = encoding.toUpperCase(Locale.ENGLISH); // If encoding is UTF-8, consume BOM if one is present. ! if (EncodingInfo.STR_UTF8.equals(encoding)) { final int[] b3 = new int[3]; int count = 0; for (; count < 3; ++count) { ! b3[count] = rewindableStream.readAndBuffer(); if (b3[count] == -1) break; } if (count == 3) { if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { --- 692,745 ---- } } } // wrap this stream in RewindableInputStream ! stream = new RewindableInputStream(stream); // perform auto-detect of encoding if necessary if (encoding == null) { // read first four bytes and determine encoding final byte[] b4 = new byte[4]; int count = 0; for (; count<4; count++ ) { ! b4[count] = (byte)stream.read(); } if (count == 4) { ! Object [] encodingDesc = getEncodingName(b4, count); ! encoding = (String)(encodingDesc[0]); ! isBigEndian = (Boolean)(encodingDesc[1]); ! stream.reset(); ! // Special case UTF-8 files with BOM created by Microsoft ! // tools. It's more efficient to consume the BOM than make ! // the reader perform extra checks. -Ac ! if (count > 2 && encoding.equals("UTF-8")) { ! int b0 = b4[0] & 0xFF; ! int b1 = b4[1] & 0xFF; ! int b2 = b4[2] & 0xFF; ! if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! // ignore first three bytes... stream.skip(3); } } ! reader = createReader(stream, encoding, isBigEndian); } else { reader = createReader(stream, encoding, isBigEndian); } } // use specified encoding else { encoding = encoding.toUpperCase(Locale.ENGLISH); // If encoding is UTF-8, consume BOM if one is present. ! if (encoding.equals("UTF-8")) { final int[] b3 = new int[3]; int count = 0; for (; count < 3; ++count) { ! b3[count] = stream.read(); if (b3[count] == -1) break; } if (count == 3) { if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) {
*** 750,804 **** } } else { stream.reset(); } } ! // If encoding is UTF-16, we still need to read the first ! // four bytes, in order to discover the byte order. ! else if (EncodingInfo.STR_UTF16.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset(); if (count >= 2) { final int b0 = b4[0]; final int b1 = b4[1]; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian isBigEndian = Boolean.TRUE; - stream.skip(2); } else if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian isBigEndian = Boolean.FALSE; - stream.skip(2); } else if (count == 4) { final int b2 = b4[2]; final int b3 = b4[3]; if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM isBigEndian = Boolean.TRUE; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM isBigEndian = Boolean.FALSE; } } } } // If encoding is UCS-4, we still need to read the first four bytes // in order to discover the byte order. ! else if (EncodingInfo.STR_UCS4.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset(); --- 748,807 ---- } } else { stream.reset(); } } ! // If encoding is UTF-16, we still need to read the first four bytes ! // in order to discover the byte order. ! else if (encoding.equals("UTF-16")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset(); + + String utf16Encoding = "UTF-16"; if (count >= 2) { final int b0 = b4[0]; final int b1 = b4[1]; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian + utf16Encoding = "UTF-16BE"; isBigEndian = Boolean.TRUE; } else if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian + utf16Encoding = "UTF-16LE"; isBigEndian = Boolean.FALSE; } else if (count == 4) { final int b2 = b4[2]; final int b3 = b4[3]; if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM + utf16Encoding = "UTF-16BE"; isBigEndian = Boolean.TRUE; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM + utf16Encoding = "UTF-16LE"; isBigEndian = Boolean.FALSE; } } } + reader = createReader(stream, utf16Encoding, isBigEndian); } // If encoding is UCS-4, we still need to read the first four bytes // in order to discover the byte order. ! else if (encoding.equals("ISO-10646-UCS-4")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset();
*** 814,828 **** } } } // If encoding is UCS-2, we still need to read the first four bytes // in order to discover the byte order. ! else if (EncodingInfo.STR_UCS2.equals(encoding)) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = rewindableStream.readAndBuffer(); if (b4[count] == -1) break; } stream.reset(); --- 817,831 ---- } } } // If encoding is UCS-2, we still need to read the first four bytes // in order to discover the byte order. ! else if (encoding.equals("ISO-10646-UCS-2")) { final int[] b4 = new int[4]; int count = 0; for (; count < 4; ++count) { ! b4[count] = stream.read(); if (b4[count] == -1) break; } stream.reset();
*** 1793,1802 **** --- 1796,1806 ---- Integer bufferSize = (Integer)value; if (bufferSize != null && bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { fBufferSize = bufferSize.intValue(); fEntityScanner.setBufferSize(fBufferSize); + fBufferPool.setExternalBufferSize(fBufferSize); } } if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { fSecurityManager = (XMLSecurityManager)value;
*** 2419,2505 **** * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. ! * @return an instance of EncodingInfo which represents the auto-detected encoding. */ ! protected EncodingInfo getEncodingInfo(byte[] b4, int count) { if (count < 2) { ! return EncodingInfo.UTF_8; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian ! return EncodingInfo.UTF_16_BIG_ENDIAN_WITH_BOM; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian ! return EncodingInfo.UTF_16_LITTLE_ENDIAN_WITH_BOM; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { ! return EncodingInfo.UTF_8; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! return EncodingInfo.UTF_8_WITH_BOM; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { ! return EncodingInfo.UTF_8; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) ! return EncodingInfo.UCS_4_BIG_ENDIAN; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) ! return EncodingInfo.UCS_4_LITTLE_ENDIAN; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? ! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? ! return EncodingInfo.UCS_4_UNUSUAL_BYTE_ORDER; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? ! return EncodingInfo.UTF_16_BIG_ENDIAN; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... ! return EncodingInfo.UTF_16_LITTLE_ENDIAN; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here ! return EncodingInfo.EBCDIC; } ! // default encoding ! return EncodingInfo.UTF_8; } // getEncodingName(byte[],int):Object[] /** * Creates a reader capable of reading the given input stream in --- 2423,2510 ---- * Returns the IANA encoding name that is auto-detected from * the bytes specified, with the endian-ness of that encoding where appropriate. * * @param b4 The first four bytes of the input. * @param count The number of bytes actually read. ! * @return a 2-element array: the first element, an IANA-encoding string, ! * the second element a Boolean which is true iff the document is big endian, false ! * if it's little-endian, and null if the distinction isn't relevant. */ ! protected Object[] getEncodingName(byte[] b4, int count) { if (count < 2) { ! return defaultEncoding; } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian ! return new Object [] {"UTF-16BE", true}; } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian ! return new Object [] {"UTF-16LE", false}; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 3) { ! return defaultEncoding; } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { ! return defaultEncoding; } // default to UTF-8 if we don't have enough bytes to make a // good determination of the encoding if (count < 4) { ! return defaultEncoding; } // other encodings int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) ! return new Object [] {"ISO-10646-UCS-4", true}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) ! return new Object [] {"ISO-10646-UCS-4", false}; } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? ! return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octect order (3412) // REVISIT: What should this be? ! return new Object [] {"ISO-10646-UCS-4", null}; } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? ! return new Object [] {"UTF-16BE", true}; } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... ! return new Object [] {"UTF-16LE", false}; } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here ! return new Object [] {"CP037", null}; } ! return defaultEncoding; } // getEncodingName(byte[],int):Object[] /** * Creates a reader capable of reading the given input stream in
*** 2510,2608 **** * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot ! * specify a byte order, this tells whether the order ! * is bigEndian. null if unknown or irrelevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) ! throws IOException { ! String enc = (encoding != null) ? encoding : EncodingInfo.STR_UTF8; ! enc = enc.toUpperCase(Locale.ENGLISH); ! MessageFormatter f = fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN); ! Locale l = fErrorReporter.getLocale(); ! switch (enc) { ! case EncodingInfo.STR_UTF8: ! return new UTF8Reader(inputStream, fBufferSize, f, l); ! case EncodingInfo.STR_UTF16: ! if (isBigEndian != null) { ! return new UTF16Reader(inputStream, fBufferSize, isBigEndian, f, l); ! } ! break; ! case EncodingInfo.STR_UTF16BE: ! return new UTF16Reader(inputStream, fBufferSize, true, f, l); ! case EncodingInfo.STR_UTF16LE: ! return new UTF16Reader(inputStream, fBufferSize, false, f, l); ! case EncodingInfo.STR_UCS4: ! if(isBigEndian != null) { ! if(isBigEndian) { ! return new UCSReader(inputStream, UCSReader.UCS4BE); ! } else { ! return new UCSReader(inputStream, UCSReader.UCS4LE); ! } } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, ! "EncodingByteOrderUnsupported", ! new Object[] { encoding }, ! XMLErrorReporter.SEVERITY_FATAL_ERROR); } ! break; ! case EncodingInfo.STR_UCS2: ! if(isBigEndian != null) { ! if(isBigEndian) { ! return new UCSReader(inputStream, UCSReader.UCS2BE); ! } else { ! return new UCSReader(inputStream, UCSReader.UCS2LE); ! } } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, ! "EncodingByteOrderUnsupported", ! new Object[] { encoding }, ! XMLErrorReporter.SEVERITY_FATAL_ERROR); } ! break; } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); ! // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 ! // because every byte is a valid ISO Latin 1 character. ! // It may not translate correctly but if we failed on ! // the encoding anyway, then we're expecting the content ! // of the document to be bad. This will just prevent an ! // invalid UTF-8 sequence to be detected. This is only ! // important when continue-after-fatal-error is turned ! // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader ! String javaEncoding = EncodingMap.getIANA2JavaMapping(enc); if (javaEncoding == null) { ! if (fAllowJavaEncodings) { javaEncoding = encoding; } else { ! fErrorReporter.reportError(this.getEntityScanner(), ! XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); ! // see comment above. ! javaEncoding = "ISO8859_1"; } } if (DEBUG_ENCODINGS) { System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); if (javaEncoding == encoding) { --- 2515,2613 ---- * encoded using. If the user has specified that * Java encoding names are allowed, then the * encoding name may be a Java encoding name; * otherwise, it is an ianaEncoding name. * @param isBigEndian For encodings (like uCS-4), whose names cannot ! * specify a byte order, this tells whether the order is bigEndian. null menas ! * unknown or not relevant. * * @return Returns a reader. */ protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) ! throws IOException { ! // normalize encoding name ! if (encoding == null) { ! encoding = "UTF-8"; ! } ! ! // try to use an optimized reader ! String ENCODING = encoding.toUpperCase(Locale.ENGLISH); ! if (ENCODING.equals("UTF-8")) { ! if (DEBUG_ENCODINGS) { ! System.out.println("$$$ creating UTF8Reader"); ! } ! return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); ! } ! if (ENCODING.equals("US-ASCII")) { ! if (DEBUG_ENCODINGS) { ! System.out.println("$$$ creating ASCIIReader"); ! } ! return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); ! } ! if(ENCODING.equals("ISO-10646-UCS-4")) { ! if(isBigEndian != null) { ! boolean isBE = isBigEndian.booleanValue(); ! if(isBE) { ! return new UCSReader(inputStream, UCSReader.UCS4BE); } else { ! return new UCSReader(inputStream, UCSReader.UCS4LE); } ! } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, ! "EncodingByteOrderUnsupported", ! new Object[] { encoding }, ! XMLErrorReporter.SEVERITY_FATAL_ERROR); ! } ! } ! if(ENCODING.equals("ISO-10646-UCS-2")) { ! if(isBigEndian != null) { // sould never happen with this encoding... ! boolean isBE = isBigEndian.booleanValue(); ! if(isBE) { ! return new UCSReader(inputStream, UCSReader.UCS2BE); } else { ! return new UCSReader(inputStream, UCSReader.UCS2LE); } ! } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, ! "EncodingByteOrderUnsupported", ! new Object[] { encoding }, ! XMLErrorReporter.SEVERITY_FATAL_ERROR); ! } } // check for valid name boolean validIANA = XMLChar.isValidIANAEncoding(encoding); boolean validJava = XMLChar.isValidJavaEncoding(encoding); if (!validIANA || (fAllowJavaEncodings && !validJava)) { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); ! // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 ! // because every byte is a valid ISO Latin 1 character. ! // It may not translate correctly but if we failed on ! // the encoding anyway, then we're expecting the content ! // of the document to be bad. This will just prevent an ! // invalid UTF-8 sequence to be detected. This is only ! // important when continue-after-fatal-error is turned ! // on. -Ac encoding = "ISO-8859-1"; } // try to use a Java reader ! String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); if (javaEncoding == null) { ! if(fAllowJavaEncodings) { javaEncoding = encoding; } else { ! fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, "EncodingDeclInvalid", new Object[] { encoding }, XMLErrorReporter.SEVERITY_FATAL_ERROR); ! // see comment above. ! javaEncoding = "ISO8859_1"; } } if (DEBUG_ENCODINGS) { System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); if (javaEncoding == encoding) {
*** 2891,2972 **** } } } // print() /** ! * Information about auto-detectable encodings. * * @xerces.internal * ! * @author Michael Glavassevich, IBM */ ! private static class EncodingInfo { ! public static final String STR_UTF8 = "UTF-8"; ! public static final String STR_UTF16 = "UTF-16"; ! public static final String STR_UTF16BE = "UTF-16BE"; ! public static final String STR_UTF16LE = "UTF-16LE"; ! public static final String STR_UCS4 = "ISO-10646-UCS-4"; ! public static final String STR_UCS2 = "ISO-10646-UCS-2"; ! public static final String STR_CP037 = "CP037"; ! ! /** UTF-8 **/ ! public static final EncodingInfo UTF_8 = ! new EncodingInfo(STR_UTF8, null, false); ! ! /** UTF-8, with BOM **/ ! public static final EncodingInfo UTF_8_WITH_BOM = ! new EncodingInfo(STR_UTF8, null, true); ! ! /** UTF-16, big-endian **/ ! public static final EncodingInfo UTF_16_BIG_ENDIAN = ! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, false); ! ! /** UTF-16, big-endian with BOM **/ ! public static final EncodingInfo UTF_16_BIG_ENDIAN_WITH_BOM = ! new EncodingInfo(STR_UTF16BE, STR_UTF16, Boolean.TRUE, true); ! ! /** UTF-16, little-endian **/ ! public static final EncodingInfo UTF_16_LITTLE_ENDIAN = ! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, false); ! ! /** UTF-16, little-endian with BOM **/ ! public static final EncodingInfo UTF_16_LITTLE_ENDIAN_WITH_BOM = ! new EncodingInfo(STR_UTF16LE, STR_UTF16, Boolean.FALSE, true); ! ! /** UCS-4, big-endian **/ ! public static final EncodingInfo UCS_4_BIG_ENDIAN = ! new EncodingInfo(STR_UCS4, Boolean.TRUE, false); ! ! /** UCS-4, little-endian **/ ! public static final EncodingInfo UCS_4_LITTLE_ENDIAN = ! new EncodingInfo(STR_UCS4, Boolean.FALSE, false); ! ! /** UCS-4, unusual byte-order (2143) or (3412) **/ ! public static final EncodingInfo UCS_4_UNUSUAL_BYTE_ORDER = ! new EncodingInfo(STR_UCS4, null, false); ! ! /** EBCDIC **/ ! public static final EncodingInfo EBCDIC = new EncodingInfo(STR_CP037, null, false); ! ! public final String autoDetectedEncoding; ! public final String readerEncoding; ! public final Boolean isBigEndian; ! public final boolean hasBOM; ! ! private EncodingInfo(String autoDetectedEncoding, Boolean isBigEndian, boolean hasBOM) { ! this(autoDetectedEncoding, autoDetectedEncoding, isBigEndian, hasBOM); ! } // <init>(String,Boolean,boolean) ! ! private EncodingInfo(String autoDetectedEncoding, String readerEncoding, ! Boolean isBigEndian, boolean hasBOM) { ! this.autoDetectedEncoding = autoDetectedEncoding; ! this.readerEncoding = readerEncoding; ! this.isBigEndian = isBigEndian; ! this.hasBOM = hasBOM; ! } // <init>(String,String,Boolean,boolean) ! } // class EncodingInfo /** * This class wraps the byte inputstreams we're presented with. * We need it because java.io.InputStreams don't provide * functionality to reread processed bytes, and they have a habit --- 2896,3007 ---- } } } // print() /** ! * Buffer used in entity manager to reuse character arrays instead ! * of creating new ones every time. ! * ! * @xerces.internal ! * ! * @author Ankit Pasricha, IBM ! */ ! private static class CharacterBuffer { ! ! /** character buffer */ ! private char[] ch; ! ! /** whether the buffer is for an external or internal scanned entity */ ! private boolean isExternal; ! ! public CharacterBuffer(boolean isExternal, int size) { ! this.isExternal = isExternal; ! ch = new char[size]; ! } ! } ! ! ! /** ! * Stores a number of character buffers and provides it to the entity ! * manager to use when an entity is seen. * * @xerces.internal * ! * @author Ankit Pasricha, IBM */ ! private static class CharacterBufferPool { ! ! private static final int DEFAULT_POOL_SIZE = 3; ! ! private CharacterBuffer[] fInternalBufferPool; ! private CharacterBuffer[] fExternalBufferPool; ! private int fExternalBufferSize; ! private int fInternalBufferSize; ! private int poolSize; ! ! private int fInternalTop; ! private int fExternalTop; ! ! public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { ! this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); ! } ! ! public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { ! fExternalBufferSize = externalBufferSize; ! fInternalBufferSize = internalBufferSize; ! this.poolSize = poolSize; ! init(); ! } ! ! /** Initializes buffer pool. **/ ! private void init() { ! fInternalBufferPool = new CharacterBuffer[poolSize]; ! fExternalBufferPool = new CharacterBuffer[poolSize]; ! fInternalTop = -1; ! fExternalTop = -1; ! } ! ! /** Retrieves buffer from pool. **/ ! public CharacterBuffer getBuffer(boolean external) { ! if (external) { ! if (fExternalTop > -1) { ! return fExternalBufferPool[fExternalTop--]; ! } ! else { ! return new CharacterBuffer(true, fExternalBufferSize); ! } ! } ! else { ! if (fInternalTop > -1) { ! return fInternalBufferPool[fInternalTop--]; ! } ! else { ! return new CharacterBuffer(false, fInternalBufferSize); ! } ! } ! } ! ! /** Returns buffer to pool. **/ ! public void returnToPool(CharacterBuffer buffer) { ! if (buffer.isExternal) { ! if (fExternalTop < fExternalBufferPool.length - 1) { ! fExternalBufferPool[++fExternalTop] = buffer; ! } ! } ! else if (fInternalTop < fInternalBufferPool.length - 1) { ! fInternalBufferPool[++fInternalTop] = buffer; ! } ! } ! ! /** Sets the size of external buffers and dumps the old pool. **/ ! public void setExternalBufferSize(int bufferSize) { ! fExternalBufferSize = bufferSize; ! fExternalBufferPool = new CharacterBuffer[poolSize]; ! fExternalTop = -1; ! } ! } /** * This class wraps the byte inputstreams we're presented with. * We need it because java.io.InputStreams don't provide * functionality to reread processed bytes, and they have a habit
*** 3015,3077 **** public void rewind() { fOffset = fStartOffset; } ! public int readAndBuffer() throws IOException { if (fOffset == fData.length) { byte[] newData = new byte[fOffset << 1]; System.arraycopy(fData, 0, newData, 0, fOffset); fData = newData; } ! final int b = fInputStream.read(); if (b == -1) { fEndOffset = fOffset; return -1; } fData[fLength++] = (byte)b; fOffset++; return b & 0xff; } - public int read() throws IOException { - if (fOffset < fLength) { - return fData[fOffset++] & 0xff; - } - if (fOffset == fEndOffset) { - return -1; - } - if (fCurrentEntity.mayReadChunks) { - return fInputStream.read(); - } - return readAndBuffer(); - } - public int read(byte[] b, int off, int len) throws IOException { ! final int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } ! // read a block of data as requested if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { if (!fCurrentEntity.xmlDeclChunkRead) { fCurrentEntity.xmlDeclChunkRead = true; len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; } return fInputStream.read(b, off, len); } ! int returnedVal = readAndBuffer(); ! if (returnedVal == -1) { ! fEndOffset = fOffset; ! return -1; } b[off] = (byte)returnedVal; return 1; } if (len < bytesLeft) { if (len <= 0) { return 0; } --- 3050,3112 ---- public void rewind() { fOffset = fStartOffset; } ! public int read() throws IOException { ! int b = 0; ! if (fOffset < fLength) { ! return fData[fOffset++] & 0xff; ! } ! if (fOffset == fEndOffset) { ! return -1; ! } if (fOffset == fData.length) { byte[] newData = new byte[fOffset << 1]; System.arraycopy(fData, 0, newData, 0, fOffset); fData = newData; } ! b = fInputStream.read(); if (b == -1) { fEndOffset = fOffset; return -1; } fData[fLength++] = (byte)b; fOffset++; return b & 0xff; } public int read(byte[] b, int off, int len) throws IOException { ! int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } ! /** ! * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); ! * //System.out.println("fInputStream = " + fInputStream ); ! * // better get some more for the voracious reader... */ ! if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { if (!fCurrentEntity.xmlDeclChunkRead) { fCurrentEntity.xmlDeclChunkRead = true; len = Entity.ScannedEntity.DEFAULT_XMLDECL_BUFFER_SIZE; } return fInputStream.read(b, off, len); } ! ! int returnedVal = read(); ! if(returnedVal == -1) { ! fEndOffset = fOffset; ! return -1; } b[off] = (byte)returnedVal; return 1; + } if (len < bytesLeft) { if (len <= 0) { return 0; }
*** 3083,3093 **** } fOffset += len; return len; } ! public long skip(long n) throws IOException { int bytesLeft; if (n <= 0) { return 0; } bytesLeft = fLength - fOffset; --- 3118,3129 ---- } fOffset += len; return len; } ! public long skip(long n) ! throws IOException { int bytesLeft; if (n <= 0) { return 0; } bytesLeft = fLength - fOffset;
*** 3104,3114 **** fOffset += bytesLeft; if (fOffset == fEndOffset) { return bytesLeft; } n -= bytesLeft; ! /* * In a manner of speaking, when this class isn't permitting more * than one byte at a time to be read, it is "blocking". The * available() method should indicate how much can be read without * blocking, so while we're in this mode, it should only indicate * that bytes in its buffer are available; otherwise, the result of --- 3140,3150 ---- fOffset += bytesLeft; if (fOffset == fEndOffset) { return bytesLeft; } n -= bytesLeft; ! /* * In a manner of speaking, when this class isn't permitting more * than one byte at a time to be read, it is "blocking". The * available() method should indicate how much can be read without * blocking, so while we're in this mode, it should only indicate * that bytes in its buffer are available; otherwise, the result of
*** 3116,3142 **** */ return fInputStream.skip(n) + bytesLeft; } public int available() throws IOException { ! final int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fCurrentEntity.mayReadChunks ? fInputStream.available() ! : 0; } return bytesLeft; } public void mark(int howMuch) { fMark = fOffset; } public void reset() { fOffset = fMark; } public boolean markSupported() { return true; } --- 3152,3179 ---- */ return fInputStream.skip(n) + bytesLeft; } public int available() throws IOException { ! int bytesLeft = fLength - fOffset; if (bytesLeft == 0) { if (fOffset == fEndOffset) { return -1; } return fCurrentEntity.mayReadChunks ? fInputStream.available() ! : 0; } return bytesLeft; } public void mark(int howMuch) { fMark = fOffset; } public void reset() { fOffset = fMark; + //test(); } public boolean markSupported() { return true; }
< prev index next >