1 /*
  2  * Copyright (c) 2015, 2026, Oracle and/or its affiliates. All rights reserved.
  3  *
  4  * Redistribution and use in source and binary forms, with or without
  5  * modification, are permitted provided that the following conditions
  6  * are met:
  7  *
  8  *   - Redistributions of source code must retain the above copyright
  9  *     notice, this list of conditions and the following disclaimer.
 10  *
 11  *   - Redistributions in binary form must reproduce the above copyright
 12  *     notice, this list of conditions and the following disclaimer in the
 13  *     documentation and/or other materials provided with the distribution.
 14  *
 15  *   - Neither the name of Oracle nor the names of its
 16  *     contributors may be used to endorse or promote products derived
 17  *     from this software without specific prior written permission.
 18  *
 19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  */
 31 
 32 #include <assert.h>
 33 #include <string.h>
 34 #include <stdlib.h>
 35 
 36 #include "endian.hpp"
 37 #include "imageDecompressor.hpp"
 38 #include "imageFile.hpp"
 39 #include "inttypes.hpp"
 40 #include "jni.h"
 41 #include "osSupport.hpp"
 42 
 43 // Map the full jimage, only with 64 bit addressing.
 44 bool ImageFileReader::memory_map_image = sizeof(void *) == 8;
 45 
 46 #ifdef WIN32
 47 const char FileSeparator = '\\';
 48 #else
 49 const char FileSeparator = '/';
 50 #endif
 51 
 52 // Image files are an alternate file format for storing classes and resources. The
 53 // goal is to supply file access which is faster and smaller than the jar format.
 54 //
 55 // (More detailed nodes in the header.)
 56 //
 57 
 58 // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
 59 s4 ImageStrings::hash_code(const char* string, s4 seed) {
 60     assert(seed > 0 && "invariant");
 61     // Access bytes as unsigned.
 62     u1* bytes = (u1*)string;
 63     u4 useed = (u4)seed;
 64     // Compute hash code.
 65     for (u1 byte = *bytes++; byte; byte = *bytes++) {
 66         useed = (useed * HASH_MULTIPLIER) ^ byte;
 67     }
 68     // Ensure the result is not signed.
 69     return (s4)(useed & 0x7FFFFFFF);
 70 }
 71 
 72 // Match up a string in a perfect hash table.
 73 // Returns the index where the name should be.
 74 // Result still needs validation for precise match (false positive.)
 75 s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
 76     // If the table is empty, then short cut.
 77     if (!redirect || !length) {
 78         return NOT_FOUND;
 79     }
 80     // Compute the basic perfect hash for name.
 81     s4 hash_code = ImageStrings::hash_code(name);
 82     // Modulo table size.
 83     s4 index = hash_code % length;
 84     // Get redirect entry.
 85     //   value == 0 then not found
 86     //   value < 0 then -1 - value is true index
 87     //   value > 0 then value is seed for recomputing hash.
 88     s4 value = endian->get(redirect[index]);
 89     // if recompute is required.
 90     if (value > 0 ) {
 91         // Entry collision value, need to recompute hash.
 92         hash_code = ImageStrings::hash_code(name, value);
 93         // Modulo table size.
 94         return hash_code % length;
 95     } else if (value < 0) {
 96         // Compute direct index.
 97         return -1 - value;
 98     }
 99     // No entry found.
100     return NOT_FOUND;
101 }
102 
103 // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
104 // return non-NULL address of remaining portion of string.  Otherwise, return
105 // NULL.    Used to test sections of a path without copying from image string
106 // table.
107 const char* ImageStrings::starts_with(const char* string, const char* start) {
108     char ch1, ch2;
109     // Match up the strings the best we can.
110     while ((ch1 = *string) && (ch2 = *start)) {
111         if (ch1 != ch2) {
112             // Mismatch, return NULL.
113             return NULL;
114         }
115         // Next characters.
116         string++, start++;
117     }
118     // Return remainder of string.
119     return string;
120 }
121 
122 // Inflates the attribute stream into individual values stored in the long
123 // array _attributes. This allows an attribute value to be quickly accessed by
124 // direct indexing.  Unspecified values default to zero (from constructor.)
125 void ImageLocation::set_data(u1* data) {
126     // Deflate the attribute stream into an array of attributes.
127     u1 byte;
128     // Repeat until end header is found.
129     while ((data != NULL) && (byte = *data)) {
130         // Extract kind from header byte.
131         u1 kind = attribute_kind(byte);
132         assert(kind < ATTRIBUTE_COUNT && "invalid image location attribute");
133         if (kind == ATTRIBUTE_END) {
134             break;
135         }
136         // Extract length of data (in bytes).
137         u1 n = attribute_length(byte);
138         // Read value (most significant first.)
139         _attributes[kind] = attribute_value(data + 1, n);
140         // Position to next attribute by skipping attribute header and data bytes.
141         data += n + 1;
142     }
143 }
144 
145 // Zero all attribute values.
146 void ImageLocation::clear_data() {
147     // Set defaults to zero.
148     memset(_attributes, 0, sizeof(_attributes));
149 }
150 
151 // Manage a table of open image files.  This table allows multiple access points
152 // to share an open image.
153 ImageFileReaderTable::ImageFileReaderTable() : _count(0), _max(_growth) {
154     _table = static_cast<ImageFileReader**>(calloc(_max, sizeof(ImageFileReader*)));
155     assert(_table != NULL && "allocation failed");
156 }
157 
158 // Add a new image entry to the table.
159 void ImageFileReaderTable::add(ImageFileReader* image) {
160     if (_count == _max) {
161         _max += _growth;
162         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
163     }
164     _table[_count++] = image;
165 }
166 
167 // Remove an image entry from the table.
168 void ImageFileReaderTable::remove(ImageFileReader* image) {
169     for (u4 i = 0; i < _count; i++) {
170         if (_table[i] == image) {
171             // Swap the last element into the found slot
172             _table[i] = _table[--_count];
173             break;
174         }
175     }
176 
177     if (_count != 0 && _count == _max - _growth) {
178         _max -= _growth;
179         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
180     }
181 }
182 
183 // Table to manage multiple opens of an image file.
184 ImageFileReaderTable ImageFileReader::_reader_table;
185 
186 SimpleCriticalSection _reader_table_lock;
187 
188 // Locate an image if file already open.
189 ImageFileReader* ImageFileReader::find_image(const char* name) {
190     // Lock out _reader_table.
191     SimpleCriticalSectionLock cs(&_reader_table_lock);
192     // Search for an exist image file.
193     for (u4 i = 0; i < _reader_table.count(); i++) {
194         // Retrieve table entry.
195         ImageFileReader* reader = _reader_table.get(i);
196         // If name matches, then reuse (bump up use count.)
197         assert(reader->name() != NULL && "reader->name must not be null");
198         if (strcmp(reader->name(), name) == 0) {
199             reader->inc_use();
200             return reader;
201         }
202     }
203 
204     return NULL;
205 }
206 
207 // Open an image file, reuse structure if file already open.
208 ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
209     ImageFileReader* reader = find_image(name);
210     if (reader != NULL) {
211         return reader;
212     }
213 
214     // Need a new image reader.
215     reader = new ImageFileReader(name, big_endian);
216     if (reader == NULL || !reader->open()) {
217         // Failed to open.
218         delete reader;
219         return NULL;
220     }
221 
222     // Lock to update
223     SimpleCriticalSectionLock cs(&_reader_table_lock);
224     // Search for an existing image file.
225     for (u4 i = 0; i < _reader_table.count(); i++) {
226         // Retrieve table entry.
227         ImageFileReader* existing_reader = _reader_table.get(i);
228         // If name matches, then reuse (bump up use count.)
229         assert(reader->name() != NULL && "reader->name still must not be null");
230         if (strcmp(existing_reader->name(), name) == 0) {
231             existing_reader->inc_use();
232             reader->close();
233             delete reader;
234             return existing_reader;
235         }
236     }
237     // Bump use count and add to table.
238     reader->inc_use();
239     _reader_table.add(reader);
240     return reader;
241 }
242 
243 // Close an image file if the file is not in use elsewhere.
244 void ImageFileReader::close(ImageFileReader *reader) {
245     // Lock out _reader_table.
246     SimpleCriticalSectionLock cs(&_reader_table_lock);
247     // If last use then remove from table and then close.
248     if (reader->dec_use()) {
249         _reader_table.remove(reader);
250         delete reader;
251     }
252 }
253 
254 // Constructor initializes to a closed state.
255 ImageFileReader::ImageFileReader(const char* name, bool big_endian) {
256     // Copy the image file name.
257      int len = (int) strlen(name) + 1;
258     _name = new char[len];
259     assert(_name != NULL  && "allocation failed");
260     strncpy(_name, name, len);
261     // Initialize for a closed file.
262     _fd = -1;
263     _endian = Endian::get_handler(big_endian);
264     _index_data = NULL;
265 }
266 
267 // Close image and free up data structures.
268 ImageFileReader::~ImageFileReader() {
269     // Ensure file is closed.
270     close();
271     // Free up name.
272     if (_name) {
273         delete[] _name;
274         _name = NULL;
275     }
276 }
277 
278 // Open image file for read access.
279 bool ImageFileReader::open() {
280     // If file exists open for reading.
281     _fd = osSupport::openReadOnly(_name);
282     if (_fd == -1) {
283         return false;
284     }
285     // Retrieve the file size.
286     _file_size = osSupport::size(_name);
287     // Read image file header and verify it has a valid header.
288     size_t header_size = sizeof(ImageHeader);
289     if (_file_size < header_size ||
290         !read_at((u1*)&_header, header_size, 0) ||
291         _header.magic(_endian) != IMAGE_MAGIC ||
292         _header.major_version(_endian) != MAJOR_VERSION ||
293         _header.minor_version(_endian) != MINOR_VERSION) {
294         close();
295         return false;
296     }
297     // Size of image index.
298     _index_size = index_size();
299     // Make sure file is large enough to contain the index.
300     if (_file_size < _index_size) {
301         return false;
302     }
303     // Memory map image (minimally the index.)
304     _index_data = (u1*)osSupport::map_memory(_fd, _name, 0, (size_t)map_size());
305     assert(_index_data && "image file not memory mapped");
306     // Retrieve length of index perfect hash table.
307     u4 length = table_length();
308     // Compute offset of the perfect hash table redirect table.
309     u4 redirect_table_offset = (u4)header_size;
310     // Compute offset of index attribute offsets.
311     u4 offsets_table_offset = redirect_table_offset + length * (u4)sizeof(s4);
312     // Compute offset of index location attribute data.
313     u4 location_bytes_offset = offsets_table_offset + length * (u4)sizeof(u4);
314     // Compute offset of index string table.
315     u4 string_bytes_offset = location_bytes_offset + locations_size();
316     // Compute address of the perfect hash table redirect table.
317     _redirect_table = (s4*)(_index_data + redirect_table_offset);
318     // Compute address of index attribute offsets.
319     _offsets_table = (u4*)(_index_data + offsets_table_offset);
320     // Compute address of index location attribute data.
321     _location_bytes = _index_data + location_bytes_offset;
322     // Compute address of index string table.
323     _string_bytes = _index_data + string_bytes_offset;
324     return true;
325 }
326 
327 // Close image file.
328 void ImageFileReader::close() {
329     // Deallocate the index.
330     if (_index_data) {
331         osSupport::unmap_memory((char*)_index_data, (size_t)map_size());
332         _index_data = NULL;
333     }
334     // Close file.
335     if (_fd != -1) {
336         osSupport::close(_fd);
337         _fd = -1;
338     }
339 }
340 
341 // Read directly from the file.
342 bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
343     return (u8)osSupport::read(_fd, (char*)data, size, offset) == size;
344 }
345 
346 // Find the location index and size associated with the path.
347 // Returns the location index and size if the location is found, 0 otherwise.
348 u4 ImageFileReader::find_location_index(const char* path, u8 *size) const {
349     // Locate the entry in the index perfect hash table.
350     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
351     // If found.
352     if (index != ImageStrings::NOT_FOUND) {
353         // Get address of first byte of location attribute stream.
354         u4 offset = get_location_offset(index);
355         u1* data = get_location_offset_data(offset);
356         // Expand location attributes.
357         ImageLocation location(data);
358         // Make sure result is not a false positive.
359         if (verify_location(location, path)) {
360                 *size = (jlong)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
361                 return offset;
362         }
363     }
364     return 0;            // not found
365 }
366 
367 // Verify that a found location matches the supplied path (without copying.)
368 bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
369     // Manage the image string table.
370     ImageStrings strings(_string_bytes, _header.strings_size(_endian));
371     // Position to first character of the path string.
372     const char* next = path;
373     // Get module name string.
374     const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
375     // If module string is not empty.
376     if (*module != '\0') {
377         // Compare '/module/' .
378         if (*next++ != '/') return false;
379         if (!(next = ImageStrings::starts_with(next, module))) return false;
380         if (*next++ != '/') return false;
381     }
382     // Get parent (package) string
383     const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
384     // If parent string is not empty string.
385     if (*parent != '\0') {
386         // Compare 'parent/' .
387         if (!(next = ImageStrings::starts_with(next, parent))) return false;
388         if (*next++ != '/') return false;
389     }
390     // Get base name string.
391     const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
392     // Compare with basne name.
393     if (!(next = ImageStrings::starts_with(next, base))) return false;
394     // Get extension string.
395     const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
396     // If extension is not empty.
397     if (*extension != '\0') {
398         // Compare '.extension' .
399         if (*next++ != '.') return false;
400         if (!(next = ImageStrings::starts_with(next, extension))) return false;
401     }
402     // True only if complete match and no more characters.
403     return *next == '\0';
404 }
405 
406 // Return the resource for the supplied location offset.
407 void ImageFileReader::get_resource(u4 offset, u1* uncompressed_data) const {
408         // Get address of first byte of location attribute stream.
409         u1* data = get_location_offset_data(offset);
410         // Expand location attributes.
411         ImageLocation location(data);
412         // Read the data
413         get_resource(location, uncompressed_data);
414 }
415 
416 // Return the resource for the supplied location.
417 void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
418     // Retrieve the byte offset and size of the resource.
419     u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
420     u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
421     u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
422     // If the resource is compressed.
423     if (compressed_size != 0) {
424         u1* compressed_data;
425         // If not memory mapped read in bytes.
426         if (!memory_map_image) {
427             // Allocate buffer for compression.
428             compressed_data = new u1[(size_t)compressed_size];
429             assert(compressed_data != NULL && "allocation failed");
430             // Read bytes from offset beyond the image index.
431             bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
432             assert(is_read && "error reading from image or short read");
433         } else {
434             compressed_data = get_data_address() + offset;
435         }
436         // Get image string table.
437         const ImageStrings strings = get_strings();
438         // Decompress resource.
439         ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
440                         &strings, _endian);
441         // If not memory mapped then release temporary buffer.
442         if (!memory_map_image) {
443                 delete[] compressed_data;
444         }
445     } else {
446         // Read bytes from offset beyond the image index.
447         bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
448         assert(is_read && "error reading from image or short read");
449     }
450 }