1 /*
  2  * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
  3  *
  4  * Redistribution and use in source and binary forms, with or without
  5  * modification, are permitted provided that the following conditions
  6  * are met:
  7  *
  8  *   - Redistributions of source code must retain the above copyright
  9  *     notice, this list of conditions and the following disclaimer.
 10  *
 11  *   - Redistributions in binary form must reproduce the above copyright
 12  *     notice, this list of conditions and the following disclaimer in the
 13  *     documentation and/or other materials provided with the distribution.
 14  *
 15  *   - Neither the name of Oracle nor the names of its
 16  *     contributors may be used to endorse or promote products derived
 17  *     from this software without specific prior written permission.
 18  *
 19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  */
 31 
 32 #include <assert.h>
 33 #include <string.h>
 34 #include <stdlib.h>
 35 
 36 #include "endian.hpp"
 37 #include "imageDecompressor.hpp"
 38 #include "imageFile.hpp"
 39 #include "inttypes.hpp"
 40 #include "jni.h"
 41 #include "osSupport.hpp"
 42 
 43 // Map the full jimage, only with 64 bit addressing.
 44 bool ImageFileReader::memory_map_image = sizeof(void *) == 8;
 45 
 46 #ifdef WIN32
 47 const char FileSeparator = '\\';
 48 #else
 49 const char FileSeparator = '/';
 50 #endif
 51 
 52 // Image files are an alternate file format for storing classes and resources. The
 53 // goal is to supply file access which is faster and smaller than the jar format.
 54 //
 55 // (More detailed nodes in the header.)
 56 //
 57 
 58 // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
 59 s4 ImageStrings::hash_code(const char* string, s4 seed) {
 60     assert(seed > 0 && "invariant");
 61     // Access bytes as unsigned.
 62     u1* bytes = (u1*)string;
 63     u4 useed = (u4)seed;
 64     // Compute hash code.
 65     for (u1 byte = *bytes++; byte; byte = *bytes++) {
 66         useed = (useed * HASH_MULTIPLIER) ^ byte;
 67     }
 68     // Ensure the result is not signed.
 69     return (s4)(useed & 0x7FFFFFFF);
 70 }
 71 
 72 // Match up a string in a perfect hash table.
 73 // Returns the index where the name should be.
 74 // Result still needs validation for precise match (false positive.)
 75 s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
 76     // If the table is empty, then short cut.
 77     if (!redirect || !length) {
 78         return NOT_FOUND;
 79     }
 80     // Compute the basic perfect hash for name.
 81     s4 hash_code = ImageStrings::hash_code(name);
 82     // Modulo table size.
 83     s4 index = hash_code % length;
 84     // Get redirect entry.
 85     //   value == 0 then not found
 86     //   value < 0 then -1 - value is true index
 87     //   value > 0 then value is seed for recomputing hash.
 88     s4 value = endian->get(redirect[index]);
 89     // if recompute is required.
 90     if (value > 0 ) {
 91         // Entry collision value, need to recompute hash.
 92         hash_code = ImageStrings::hash_code(name, value);
 93         // Modulo table size.
 94         return hash_code % length;
 95     } else if (value < 0) {
 96         // Compute direct index.
 97         return -1 - value;
 98     }
 99     // No entry found.
100     return NOT_FOUND;
101 }
102 
103 // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
104 // return non-NULL address of remaining portion of string.  Otherwise, return
105 // NULL.    Used to test sections of a path without copying from image string
106 // table.
107 const char* ImageStrings::starts_with(const char* string, const char* start) {
108     char ch1, ch2;
109     // Match up the strings the best we can.
110     while ((ch1 = *string) && (ch2 = *start)) {
111         if (ch1 != ch2) {
112             // Mismatch, return NULL.
113             return NULL;
114         }
115         // Next characters.
116         string++, start++;
117     }
118     // Return remainder of string.
119     return string;
120 }
121 
122 // Inflates the attribute stream into individual values stored in the long
123 // array _attributes. This allows an attribute value to be quickly accessed by
124 // direct indexing.  Unspecified values default to zero (from constructor.)
125 void ImageLocation::set_data(u1* data) {
126     // Deflate the attribute stream into an array of attributes.
127     u1 byte;
128     // Repeat until end header is found.
129     while ((data != NULL) && (byte = *data)) {
130         // Extract kind from header byte.
131         u1 kind = attribute_kind(byte);
132         assert(kind < ATTRIBUTE_COUNT && "invalid image location attribute");
133         if (kind == ATTRIBUTE_END) {
134             break;
135         }
136         // Extract length of data (in bytes).
137         u1 n = attribute_length(byte);
138         // Read value (most significant first.)
139         _attributes[kind] = attribute_value(data + 1, n);
140         // Position to next attribute by skipping attribute header and data bytes.
141         data += n + 1;
142     }
143 }
144 
145 // Zero all attribute values.
146 void ImageLocation::clear_data() {
147     // Set defaults to zero.
148     memset(_attributes, 0, sizeof(_attributes));
149 }
150 
151 // Manage a table of open image files.  This table allows multiple access points
152 // to share an open image.
153 ImageFileReaderTable::ImageFileReaderTable() : _count(0), _max(_growth) {
154     _table = static_cast<ImageFileReader**>(calloc(_max, sizeof(ImageFileReader*)));
155     assert(_table != NULL && "allocation failed");
156 }
157 
158 // Add a new image entry to the table.
159 void ImageFileReaderTable::add(ImageFileReader* image) {
160     if (_count == _max) {
161         _max += _growth;
162         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
163     }
164     _table[_count++] = image;
165 }
166 
167 // Remove an image entry from the table.
168 void ImageFileReaderTable::remove(ImageFileReader* image) {
169     for (u4 i = 0; i < _count; i++) {
170         if (_table[i] == image) {
171             // Swap the last element into the found slot
172             _table[i] = _table[--_count];
173             break;
174         }
175     }
176 
177     if (_count != 0 && _count == _max - _growth) {
178         _max -= _growth;
179         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
180     }
181 }
182 
183 // Determine if image entry is in table.
184 bool ImageFileReaderTable::contains(ImageFileReader* image) {
185     for (u4 i = 0; i < _count; i++) {
186         if (_table[i] == image) {
187             return true;
188         }
189     }
190     return false;
191 }
192 
193 // Table to manage multiple opens of an image file.
194 ImageFileReaderTable ImageFileReader::_reader_table;
195 
196 SimpleCriticalSection _reader_table_lock;
197 
198 // Locate an image if file already open.
199 ImageFileReader* ImageFileReader::find_image(const char* name) {
200     // Lock out _reader_table.
201     SimpleCriticalSectionLock cs(&_reader_table_lock);
202     // Search for an exist image file.
203     for (u4 i = 0; i < _reader_table.count(); i++) {
204         // Retrieve table entry.
205         ImageFileReader* reader = _reader_table.get(i);
206         // If name matches, then reuse (bump up use count.)
207         assert(reader->name() != NULL && "reader->name must not be null");
208         if (strcmp(reader->name(), name) == 0) {
209             reader->inc_use();
210             return reader;
211         }
212     }
213 
214     return NULL;
215 }
216 
217 // Open an image file, reuse structure if file already open.
218 ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
219     ImageFileReader* reader = find_image(name);
220     if (reader != NULL) {
221         return reader;
222     }
223 
224     // Need a new image reader.
225     reader = new ImageFileReader(name, big_endian);
226     if (reader == NULL || !reader->open()) {
227         // Failed to open.
228         delete reader;
229         return NULL;
230     }
231 
232     // Lock to update
233     SimpleCriticalSectionLock cs(&_reader_table_lock);
234     // Search for an existing image file.
235     for (u4 i = 0; i < _reader_table.count(); i++) {
236         // Retrieve table entry.
237         ImageFileReader* existing_reader = _reader_table.get(i);
238         // If name matches, then reuse (bump up use count.)
239         assert(reader->name() != NULL && "reader->name still must not be null");
240         if (strcmp(existing_reader->name(), name) == 0) {
241             existing_reader->inc_use();
242             reader->close();
243             delete reader;
244             return existing_reader;
245         }
246     }
247     // Bump use count and add to table.
248     reader->inc_use();
249     _reader_table.add(reader);
250     return reader;
251 }
252 
253 // Close an image file if the file is not in use elsewhere.
254 void ImageFileReader::close(ImageFileReader *reader) {
255     // Lock out _reader_table.
256     SimpleCriticalSectionLock cs(&_reader_table_lock);
257     // If last use then remove from table and then close.
258     if (reader->dec_use()) {
259         _reader_table.remove(reader);
260         delete reader;
261     }
262 }
263 
264 // Return an id for the specified ImageFileReader.
265 u8 ImageFileReader::reader_to_ID(ImageFileReader *reader) {
266     // ID is just the cloaked reader address.
267     return (u8)reader;
268 }
269 
270 // Validate the image id.
271 bool ImageFileReader::id_check(u8 id) {
272     // Make sure the ID is a managed (_reader_table) reader.
273     SimpleCriticalSectionLock cs(&_reader_table_lock);
274     return _reader_table.contains((ImageFileReader*)id);
275 }
276 
277 // Return an id for the specified ImageFileReader.
278 ImageFileReader* ImageFileReader::id_to_reader(u8 id) {
279     assert(id_check(id) && "invalid image id");
280     return (ImageFileReader*)id;
281 }
282 
283 // Constructor initializes to a closed state.
284 ImageFileReader::ImageFileReader(const char* name, bool big_endian) {
285     // Copy the image file name.
286      int len = (int) strlen(name) + 1;
287     _name = new char[len];
288     assert(_name != NULL  && "allocation failed");
289     strncpy(_name, name, len);
290     // Initialize for a closed file.
291     _fd = -1;
292     _endian = Endian::get_handler(big_endian);
293     _index_data = NULL;
294 }
295 
296 // Close image and free up data structures.
297 ImageFileReader::~ImageFileReader() {
298     // Ensure file is closed.
299     close();
300     // Free up name.
301     if (_name) {
302         delete[] _name;
303         _name = NULL;
304     }
305 }
306 
307 // Open image file for read access.
308 bool ImageFileReader::open() {
309     // If file exists open for reading.
310     _fd = osSupport::openReadOnly(_name);
311     if (_fd == -1) {
312         return false;
313     }
314     // Retrieve the file size.
315     _file_size = osSupport::size(_name);
316     // Read image file header and verify it has a valid header.
317     size_t header_size = sizeof(ImageHeader);
318     if (_file_size < header_size ||
319         !read_at((u1*)&_header, header_size, 0) ||
320         _header.magic(_endian) != IMAGE_MAGIC ||
321         _header.major_version(_endian) != MAJOR_VERSION ||
322         _header.minor_version(_endian) != MINOR_VERSION) {
323         close();
324         return false;
325     }
326     // Size of image index.
327     _index_size = index_size();
328     // Make sure file is large enough to contain the index.
329     if (_file_size < _index_size) {
330         return false;
331     }
332     // Memory map image (minimally the index.)
333     _index_data = (u1*)osSupport::map_memory(_fd, _name, 0, (size_t)map_size());
334     assert(_index_data && "image file not memory mapped");
335     // Retrieve length of index perfect hash table.
336     u4 length = table_length();
337     // Compute offset of the perfect hash table redirect table.
338     u4 redirect_table_offset = (u4)header_size;
339     // Compute offset of index attribute offsets.
340     u4 offsets_table_offset = redirect_table_offset + length * (u4)sizeof(s4);
341     // Compute offset of index location attribute data.
342     u4 location_bytes_offset = offsets_table_offset + length * (u4)sizeof(u4);
343     // Compute offset of index string table.
344     u4 string_bytes_offset = location_bytes_offset + locations_size();
345     // Compute address of the perfect hash table redirect table.
346     _redirect_table = (s4*)(_index_data + redirect_table_offset);
347     // Compute address of index attribute offsets.
348     _offsets_table = (u4*)(_index_data + offsets_table_offset);
349     // Compute address of index location attribute data.
350     _location_bytes = _index_data + location_bytes_offset;
351     // Compute address of index string table.
352     _string_bytes = _index_data + string_bytes_offset;
353     return true;
354 }
355 
356 // Close image file.
357 void ImageFileReader::close() {
358     // Deallocate the index.
359     if (_index_data) {
360         osSupport::unmap_memory((char*)_index_data, (size_t)map_size());
361         _index_data = NULL;
362     }
363     // Close file.
364     if (_fd != -1) {
365         osSupport::close(_fd);
366         _fd = -1;
367     }
368 }
369 
370 // Read directly from the file.
371 bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
372     return (u8)osSupport::read(_fd, (char*)data, size, offset) == size;
373 }
374 
375 // Find the location attributes associated with the path.    Returns true if
376 // the location is found, false otherwise.
377 bool ImageFileReader::find_location(const char* path, ImageLocation& location) const {
378     // Locate the entry in the index perfect hash table.
379     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
380     // If is found.
381     if (index != ImageStrings::NOT_FOUND) {
382         // Get address of first byte of location attribute stream.
383         u1* data = get_location_data(index);
384         // Expand location attributes.
385         location.set_data(data);
386         // Make sure result is not a false positive.
387         return verify_location(location, path);
388     }
389     return false;
390 }
391 
392 // Find the location index and size associated with the path.
393 // Returns the location index and size if the location is found, 0 otherwise.
394 u4 ImageFileReader::find_location_index(const char* path, u8 *size) const {
395     // Locate the entry in the index perfect hash table.
396     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
397     // If found.
398     if (index != ImageStrings::NOT_FOUND) {
399         // Get address of first byte of location attribute stream.
400         u4 offset = get_location_offset(index);
401         u1* data = get_location_offset_data(offset);
402         // Expand location attributes.
403         ImageLocation location(data);
404         // Make sure result is not a false positive.
405         if (verify_location(location, path)) {
406                 *size = (jlong)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
407                 return offset;
408         }
409     }
410     return 0;            // not found
411 }
412 
413 // Verify that a found location matches the supplied path (without copying.)
414 bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
415     // Manage the image string table.
416     ImageStrings strings(_string_bytes, _header.strings_size(_endian));
417     // Position to first character of the path string.
418     const char* next = path;
419     // Get module name string.
420     const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
421     // If module string is not empty.
422     if (*module != '\0') {
423         // Compare '/module/' .
424         if (*next++ != '/') return false;
425         if (!(next = ImageStrings::starts_with(next, module))) return false;
426         if (*next++ != '/') return false;
427     }
428     // Get parent (package) string
429     const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
430     // If parent string is not empty string.
431     if (*parent != '\0') {
432         // Compare 'parent/' .
433         if (!(next = ImageStrings::starts_with(next, parent))) return false;
434         if (*next++ != '/') return false;
435     }
436     // Get base name string.
437     const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
438     // Compare with basne name.
439     if (!(next = ImageStrings::starts_with(next, base))) return false;
440     // Get extension string.
441     const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
442     // If extension is not empty.
443     if (*extension != '\0') {
444         // Compare '.extension' .
445         if (*next++ != '.') return false;
446         if (!(next = ImageStrings::starts_with(next, extension))) return false;
447     }
448     // True only if complete match and no more characters.
449     return *next == '\0';
450 }
451 
452 // Return the resource for the supplied location offset.
453 void ImageFileReader::get_resource(u4 offset, u1* uncompressed_data) const {
454         // Get address of first byte of location attribute stream.
455         u1* data = get_location_offset_data(offset);
456         // Expand location attributes.
457         ImageLocation location(data);
458         // Read the data
459         get_resource(location, uncompressed_data);
460 }
461 
462 // Return the resource for the supplied location.
463 void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
464     // Retrieve the byte offset and size of the resource.
465     u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
466     u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
467     u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
468     // If the resource is compressed.
469     if (compressed_size != 0) {
470         u1* compressed_data;
471         // If not memory mapped read in bytes.
472         if (!memory_map_image) {
473             // Allocate buffer for compression.
474             compressed_data = new u1[(size_t)compressed_size];
475             assert(compressed_data != NULL && "allocation failed");
476             // Read bytes from offset beyond the image index.
477             bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
478             assert(is_read && "error reading from image or short read");
479         } else {
480             compressed_data = get_data_address() + offset;
481         }
482         // Get image string table.
483         const ImageStrings strings = get_strings();
484         // Decompress resource.
485         ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
486                         &strings, _endian);
487         // If not memory mapped then release temporary buffer.
488         if (!memory_map_image) {
489                 delete[] compressed_data;
490         }
491     } else {
492         // Read bytes from offset beyond the image index.
493         bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
494         assert(is_read && "error reading from image or short read");
495     }
496 }