1 /*
  2  * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
  3  *
  4  * Redistribution and use in source and binary forms, with or without
  5  * modification, are permitted provided that the following conditions
  6  * are met:
  7  *
  8  *   - Redistributions of source code must retain the above copyright
  9  *     notice, this list of conditions and the following disclaimer.
 10  *
 11  *   - Redistributions in binary form must reproduce the above copyright
 12  *     notice, this list of conditions and the following disclaimer in the
 13  *     documentation and/or other materials provided with the distribution.
 14  *
 15  *   - Neither the name of Oracle nor the names of its
 16  *     contributors may be used to endorse or promote products derived
 17  *     from this software without specific prior written permission.
 18  *
 19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30  */
 31 
 32 #include <assert.h>
 33 #include <string.h>
 34 #include <stdlib.h>
 35 
 36 #include "endian.hpp"
 37 #include "imageDecompressor.hpp"
 38 #include "imageFile.hpp"
 39 #include "inttypes.hpp"
 40 #include "jni.h"
 41 #include "osSupport.hpp"
 42 
 43 // Map the full jimage, only with 64 bit addressing.
 44 bool ImageFileReader::memory_map_image = sizeof(void *) == 8;
 45 
 46 #ifdef WIN32
 47 const char FileSeparator = '\\';
 48 #else
 49 const char FileSeparator = '/';
 50 #endif
 51 
 52 // Image files are an alternate file format for storing classes and resources. The
 53 // goal is to supply file access which is faster and smaller than the jar format.
 54 //
 55 // (More detailed nodes in the header.)
 56 //
 57 
 58 // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
 59 s4 ImageStrings::hash_code(const char* string, s4 seed) {
 60     assert(seed > 0 && "invariant");
 61     // Access bytes as unsigned.
 62     u1* bytes = (u1*)string;
 63     u4 useed = (u4)seed;
 64     // Compute hash code.
 65     for (u1 byte = *bytes++; byte; byte = *bytes++) {
 66         useed = (useed * HASH_MULTIPLIER) ^ byte;
 67     }
 68     // Ensure the result is not signed.
 69     return (s4)(useed & 0x7FFFFFFF);
 70 }
 71 
 72 // Match up a string in a perfect hash table.
 73 // Returns the index where the name should be.
 74 // Result still needs validation for precise match (false positive.)
 75 s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
 76     // If the table is empty, then short cut.
 77     if (!redirect || !length) {
 78         return NOT_FOUND;
 79     }
 80     // Compute the basic perfect hash for name.
 81     s4 hash_code = ImageStrings::hash_code(name);
 82     // Modulo table size.
 83     s4 index = hash_code % length;
 84     // Get redirect entry.
 85     //   value == 0 then not found
 86     //   value < 0 then -1 - value is true index
 87     //   value > 0 then value is seed for recomputing hash.
 88     s4 value = endian->get(redirect[index]);
 89     // if recompute is required.
 90     if (value > 0 ) {
 91         // Entry collision value, need to recompute hash.
 92         hash_code = ImageStrings::hash_code(name, value);
 93         // Modulo table size.
 94         return hash_code % length;
 95     } else if (value < 0) {
 96         // Compute direct index.
 97         return -1 - value;
 98     }
 99     // No entry found.
100     return NOT_FOUND;
101 }
102 
103 // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
104 // return non-NULL address of remaining portion of string.  Otherwise, return
105 // NULL.    Used to test sections of a path without copying from image string
106 // table.
107 const char* ImageStrings::starts_with(const char* string, const char* start) {
108     char ch1, ch2;
109     // Match up the strings the best we can.
110     while ((ch1 = *string) && (ch2 = *start)) {
111         if (ch1 != ch2) {
112             // Mismatch, return NULL.
113             return NULL;
114         }
115         // Next characters.
116         string++, start++;
117     }
118     // Return remainder of string.
119     return string;
120 }
121 
122 // Inflates the attribute stream into individual values stored in the long
123 // array _attributes. This allows an attribute value to be quickly accessed by
124 // direct indexing.  Unspecified values default to zero (from constructor.)
125 void ImageLocation::set_data(u1* data) {
126     // Deflate the attribute stream into an array of attributes.
127     u1 byte;
128     // Repeat until end header is found.
129     while ((data != NULL) && (byte = *data)) {
130         // Extract kind from header byte.
131         u1 kind = attribute_kind(byte);
132         assert(kind < ATTRIBUTE_COUNT && "invalid image location attribute");
133         if (kind == ATTRIBUTE_END) {
134             break;
135         }
136         // Extract length of data (in bytes).
137         u1 n = attribute_length(byte);
138         // Read value (most significant first.)
139         _attributes[kind] = attribute_value(data + 1, n);
140         // Position to next attribute by skipping attribute header and data bytes.
141         data += n + 1;
142     }
143 }
144 
145 // Zero all attribute values.
146 void ImageLocation::clear_data() {
147     // Set defaults to zero.
148     memset(_attributes, 0, sizeof(_attributes));
149 }
150 
151 // ImageModuleData constructor maps out sub-tables for faster access.
152 ImageModuleData::ImageModuleData(const ImageFileReader* image_file) :
153         _image_file(image_file),
154         _endian(image_file->endian()) {
155 }
156 
157 // Release module data resource.
158 ImageModuleData::~ImageModuleData() {
159 }
160 
161 
162 // Return the module in which a package resides.    Returns NULL if not found.
163 const char* ImageModuleData::package_to_module(const char* package_name) {
164     // replace all '/' by '.'
165     char* replaced = new char[(int) strlen(package_name) + 1];
166     assert(replaced != NULL && "allocation failed");
167     int i;
168     for (i = 0; package_name[i] != '\0'; i++) {
169       replaced[i] = package_name[i] == '/' ? '.' : package_name[i];
170     }
171     replaced[i] = '\0';
172 
173     // build path /packages/<package_name>
174     const char* radical = "/packages/";
175     char* path = new char[(int) strlen(radical) + (int) strlen(package_name) + 1];
176     assert(path != NULL && "allocation failed");
177     strcpy(path, radical);
178     strcat(path, replaced);
179     delete[] replaced;
180 
181     // retrieve package location
182     ImageLocation location;
183     bool found = _image_file->find_location(path, location);
184     delete[] path;
185     if (!found) {
186         return NULL;
187     }
188 
189     // retrieve offsets to module name
190     int size = (int)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
191     u1* content = new u1[size];
192     assert(content != NULL && "allocation failed");
193     _image_file->get_resource(location, content);
194     u1* ptr = content;
195     // sequence of sizeof(8) isEmpty|offset. Use the first module that is not empty.
196     u4 offset = 0;
197     for (i = 0; i < size; i+=8) {
198         u4 isEmpty = _endian->get(*((u4*)ptr));
199         ptr += 4;
200         if (!isEmpty) {
201             offset = _endian->get(*((u4*)ptr));
202             break;
203         }
204         ptr += 4;
205     }
206     delete[] content;
207     return _image_file->get_strings().get(offset);
208 }
209 
210 // Manage a table of open image files.  This table allows multiple access points
211 // to share an open image.
212 ImageFileReaderTable::ImageFileReaderTable() : _count(0), _max(_growth) {
213     _table = static_cast<ImageFileReader**>(calloc(_max, sizeof(ImageFileReader*)));
214     assert(_table != NULL && "allocation failed");
215 }
216 
217 // Add a new image entry to the table.
218 void ImageFileReaderTable::add(ImageFileReader* image) {
219     if (_count == _max) {
220         _max += _growth;
221         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
222     }
223     _table[_count++] = image;
224 }
225 
226 // Remove an image entry from the table.
227 void ImageFileReaderTable::remove(ImageFileReader* image) {
228     for (u4 i = 0; i < _count; i++) {
229         if (_table[i] == image) {
230             // Swap the last element into the found slot
231             _table[i] = _table[--_count];
232             break;
233         }
234     }
235 
236     if (_count != 0 && _count == _max - _growth) {
237         _max -= _growth;
238         _table = static_cast<ImageFileReader**>(realloc(_table, _max * sizeof(ImageFileReader*)));
239     }
240 }
241 
242 // Determine if image entry is in table.
243 bool ImageFileReaderTable::contains(ImageFileReader* image) {
244     for (u4 i = 0; i < _count; i++) {
245         if (_table[i] == image) {
246             return true;
247         }
248     }
249     return false;
250 }
251 
252 // Table to manage multiple opens of an image file.
253 ImageFileReaderTable ImageFileReader::_reader_table;
254 
255 SimpleCriticalSection _reader_table_lock;
256 
257 // Locate an image if file already open.
258 ImageFileReader* ImageFileReader::find_image(const char* name) {
259     // Lock out _reader_table.
260     SimpleCriticalSectionLock cs(&_reader_table_lock);
261     // Search for an exist image file.
262     for (u4 i = 0; i < _reader_table.count(); i++) {
263         // Retrieve table entry.
264         ImageFileReader* reader = _reader_table.get(i);
265         // If name matches, then reuse (bump up use count.)
266         assert(reader->name() != NULL && "reader->name must not be null");
267         if (strcmp(reader->name(), name) == 0) {
268             reader->inc_use();
269             return reader;
270         }
271     }
272 
273     return NULL;
274 }
275 
276 // Open an image file, reuse structure if file already open.
277 ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
278     ImageFileReader* reader = find_image(name);
279     if (reader != NULL) {
280         return reader;
281     }
282 
283     // Need a new image reader.
284     reader = new ImageFileReader(name, big_endian);
285     if (reader == NULL || !reader->open()) {
286         // Failed to open.
287         delete reader;
288         return NULL;
289     }
290 
291     // Lock to update
292     SimpleCriticalSectionLock cs(&_reader_table_lock);
293     // Search for an existing image file.
294     for (u4 i = 0; i < _reader_table.count(); i++) {
295         // Retrieve table entry.
296         ImageFileReader* existing_reader = _reader_table.get(i);
297         // If name matches, then reuse (bump up use count.)
298         assert(reader->name() != NULL && "reader->name still must not be null");
299         if (strcmp(existing_reader->name(), name) == 0) {
300             existing_reader->inc_use();
301             reader->close();
302             delete reader;
303             return existing_reader;
304         }
305     }
306     // Bump use count and add to table.
307     reader->inc_use();
308     _reader_table.add(reader);
309     return reader;
310 }
311 
312 // Close an image file if the file is not in use elsewhere.
313 void ImageFileReader::close(ImageFileReader *reader) {
314     // Lock out _reader_table.
315     SimpleCriticalSectionLock cs(&_reader_table_lock);
316     // If last use then remove from table and then close.
317     if (reader->dec_use()) {
318         _reader_table.remove(reader);
319         delete reader;
320     }
321 }
322 
323 // Return an id for the specified ImageFileReader.
324 u8 ImageFileReader::reader_to_ID(ImageFileReader *reader) {
325     // ID is just the cloaked reader address.
326     return (u8)reader;
327 }
328 
329 // Validate the image id.
330 bool ImageFileReader::id_check(u8 id) {
331     // Make sure the ID is a managed (_reader_table) reader.
332     SimpleCriticalSectionLock cs(&_reader_table_lock);
333     return _reader_table.contains((ImageFileReader*)id);
334 }
335 
336 // Return an id for the specified ImageFileReader.
337 ImageFileReader* ImageFileReader::id_to_reader(u8 id) {
338     assert(id_check(id) && "invalid image id");
339     return (ImageFileReader*)id;
340 }
341 
342 // Constructor initializes to a closed state.
343 ImageFileReader::ImageFileReader(const char* name, bool big_endian) :
344     _module_data(NULL) {
345     // Copy the image file name.
346      int len = (int) strlen(name) + 1;
347     _name = new char[len];
348     assert(_name != NULL  && "allocation failed");
349     strncpy(_name, name, len);
350     // Initialize for a closed file.
351     _fd = -1;
352     _endian = Endian::get_handler(big_endian);
353     _index_data = NULL;
354 }
355 
356 // Close image and free up data structures.
357 ImageFileReader::~ImageFileReader() {
358     // Ensure file is closed.
359     close();
360     // Free up name.
361     if (_name) {
362         delete[] _name;
363         _name = NULL;
364     }
365 
366     if (_module_data != NULL) {
367         delete _module_data;
368     }
369 }
370 
371 // Open image file for read access.
372 bool ImageFileReader::open() {
373     // If file exists open for reading.
374     _fd = osSupport::openReadOnly(_name);
375     if (_fd == -1) {
376         return false;
377     }
378     // Retrieve the file size.
379     _file_size = osSupport::size(_name);
380     // Read image file header and verify it has a valid header.
381     size_t header_size = sizeof(ImageHeader);
382     if (_file_size < header_size ||
383         !read_at((u1*)&_header, header_size, 0) ||
384         _header.magic(_endian) != IMAGE_MAGIC ||
385         _header.major_version(_endian) != MAJOR_VERSION ||
386         _header.minor_version(_endian) != MINOR_VERSION) {
387         close();
388         return false;
389     }
390     // Size of image index.
391     _index_size = index_size();
392     // Make sure file is large enough to contain the index.
393     if (_file_size < _index_size) {
394         return false;
395     }
396     // Memory map image (minimally the index.)
397     _index_data = (u1*)osSupport::map_memory(_fd, _name, 0, (size_t)map_size());
398     assert(_index_data && "image file not memory mapped");
399     // Retrieve length of index perfect hash table.
400     u4 length = table_length();
401     // Compute offset of the perfect hash table redirect table.
402     u4 redirect_table_offset = (u4)header_size;
403     // Compute offset of index attribute offsets.
404     u4 offsets_table_offset = redirect_table_offset + length * (u4)sizeof(s4);
405     // Compute offset of index location attribute data.
406     u4 location_bytes_offset = offsets_table_offset + length * (u4)sizeof(u4);
407     // Compute offset of index string table.
408     u4 string_bytes_offset = location_bytes_offset + locations_size();
409     // Compute address of the perfect hash table redirect table.
410     _redirect_table = (s4*)(_index_data + redirect_table_offset);
411     // Compute address of index attribute offsets.
412     _offsets_table = (u4*)(_index_data + offsets_table_offset);
413     // Compute address of index location attribute data.
414     _location_bytes = _index_data + location_bytes_offset;
415     // Compute address of index string table.
416     _string_bytes = _index_data + string_bytes_offset;
417 
418     // Initialize the module data
419     _module_data = new ImageModuleData(this);
420     // Successful open (if memory allocation succeeded).
421     return _module_data != NULL;
422 }
423 
424 // Close image file.
425 void ImageFileReader::close() {
426     // Deallocate the index.
427     if (_index_data) {
428         osSupport::unmap_memory((char*)_index_data, (size_t)map_size());
429         _index_data = NULL;
430     }
431     // Close file.
432     if (_fd != -1) {
433         osSupport::close(_fd);
434         _fd = -1;
435     }
436 
437     if (_module_data != NULL) {
438         delete _module_data;
439         _module_data = NULL;
440     }
441 }
442 
443 // Read directly from the file.
444 bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
445     return (u8)osSupport::read(_fd, (char*)data, size, offset) == size;
446 }
447 
448 // Find the location attributes associated with the path.    Returns true if
449 // the location is found, false otherwise.
450 bool ImageFileReader::find_location(const char* path, ImageLocation& location) const {
451     // Locate the entry in the index perfect hash table.
452     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
453     // If is found.
454     if (index != ImageStrings::NOT_FOUND) {
455         // Get address of first byte of location attribute stream.
456         u1* data = get_location_data(index);
457         // Expand location attributes.
458         location.set_data(data);
459         // Make sure result is not a false positive.
460         return verify_location(location, path);
461     }
462     return false;
463 }
464 
465 // Find the location index and size associated with the path.
466 // Returns the location index and size if the location is found, 0 otherwise.
467 u4 ImageFileReader::find_location_index(const char* path, u8 *size) const {
468     // Locate the entry in the index perfect hash table.
469     s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
470     // If found.
471     if (index != ImageStrings::NOT_FOUND) {
472         // Get address of first byte of location attribute stream.
473         u4 offset = get_location_offset(index);
474         u1* data = get_location_offset_data(offset);
475         // Expand location attributes.
476         ImageLocation location(data);
477         // Make sure result is not a false positive.
478         if (verify_location(location, path)) {
479                 *size = (jlong)location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
480                 return offset;
481         }
482     }
483     return 0;            // not found
484 }
485 
486 // Verify that a found location matches the supplied path (without copying.)
487 bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
488     // Manage the image string table.
489     ImageStrings strings(_string_bytes, _header.strings_size(_endian));
490     // Position to first character of the path string.
491     const char* next = path;
492     // Get module name string.
493     const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
494     // If module string is not empty.
495     if (*module != '\0') {
496         // Compare '/module/' .
497         if (*next++ != '/') return false;
498         if (!(next = ImageStrings::starts_with(next, module))) return false;
499         if (*next++ != '/') return false;
500     }
501     // Get parent (package) string
502     const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
503     // If parent string is not empty string.
504     if (*parent != '\0') {
505         // Compare 'parent/' .
506         if (!(next = ImageStrings::starts_with(next, parent))) return false;
507         if (*next++ != '/') return false;
508     }
509     // Get base name string.
510     const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
511     // Compare with basne name.
512     if (!(next = ImageStrings::starts_with(next, base))) return false;
513     // Get extension string.
514     const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
515     // If extension is not empty.
516     if (*extension != '\0') {
517         // Compare '.extension' .
518         if (*next++ != '.') return false;
519         if (!(next = ImageStrings::starts_with(next, extension))) return false;
520     }
521     // True only if complete match and no more characters.
522     return *next == '\0';
523 }
524 
525 // Return the resource for the supplied location offset.
526 void ImageFileReader::get_resource(u4 offset, u1* uncompressed_data) const {
527         // Get address of first byte of location attribute stream.
528         u1* data = get_location_offset_data(offset);
529         // Expand location attributes.
530         ImageLocation location(data);
531         // Read the data
532         get_resource(location, uncompressed_data);
533 }
534 
535 // Return the resource for the supplied location.
536 void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
537     // Retrieve the byte offset and size of the resource.
538     u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
539     u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
540     u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
541     // If the resource is compressed.
542     if (compressed_size != 0) {
543         u1* compressed_data;
544         // If not memory mapped read in bytes.
545         if (!memory_map_image) {
546             // Allocate buffer for compression.
547             compressed_data = new u1[(size_t)compressed_size];
548             assert(compressed_data != NULL && "allocation failed");
549             // Read bytes from offset beyond the image index.
550             bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
551             assert(is_read && "error reading from image or short read");
552         } else {
553             compressed_data = get_data_address() + offset;
554         }
555         // Get image string table.
556         const ImageStrings strings = get_strings();
557         // Decompress resource.
558         ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
559                         &strings, _endian);
560         // If not memory mapped then release temporary buffer.
561         if (!memory_map_image) {
562                 delete[] compressed_data;
563         }
564     } else {
565         // Read bytes from offset beyond the image index.
566         bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
567         assert(is_read && "error reading from image or short read");
568     }
569 }
570 
571 // Return the ImageModuleData for this image
572 ImageModuleData * ImageFileReader::get_image_module_data() {
573     return _module_data;
574 }