1 /*
2 * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * - Neither the name of Oracle nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #ifndef LIBJIMAGE_IMAGEFILE_HPP
33 #define LIBJIMAGE_IMAGEFILE_HPP
34
35 #include <assert.h>
36
37 #include "endian.hpp"
38 #include "inttypes.hpp"
39
40 // Image files are an alternate file format for storing classes and resources. The
41 // goal is to supply file access which is faster and smaller than the jar format.
42 // It should be noted that unlike jars, information stored in an image is in native
43 // endian format. This allows the image to be mapped into memory without endian
44 // translation. This also means that images are platform dependent.
45 //
46 // Image files are structured as three sections;
47 //
48 // +-----------+
49 // | Header |
50 // +-----------+
51 // | |
52 // | Index |
53 // | |
54 // +-----------+
55 // | |
56 // | |
57 // | Resources |
58 // | |
59 // | |
60 // +-----------+
61 //
62 // The header contains information related to identification and description of
63 // contents.
64 //
65 // +-------------------------+
66 // | Magic (0xCAFEDADA) |
67 // +------------+------------+
68 // | Major Vers | Minor Vers |
69 // +------------+------------+
70 // | Flags |
71 // +-------------------------+
72 // | Resource Count |
73 // +-------------------------+
74 // | Table Length |
75 // +-------------------------+
76 // | Attributes Size |
77 // +-------------------------+
78 // | Strings Size |
79 // +-------------------------+
80 //
81 // Magic - means of identifying validity of the file. This avoids requiring a
82 // special file extension.
83 // Major vers, minor vers - differences in version numbers indicate structural
84 // changes in the image.
85 // Flags - various image wide flags (future).
86 // Resource count - number of resources in the file.
87 // Table length - the length of lookup tables used in the index.
88 // Attributes size - number of bytes in the region used to store location attribute
89 // streams.
90 // Strings size - the size of the region used to store strings used by the
91 // index and meta data.
92 //
93 // The index contains information related to resource lookup. The algorithm
94 // used for lookup is "A Practical Minimal Perfect Hashing Method"
95 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
96 // in the form /<module>/<package>/<base>.<extension> return the resource location
97 // information;
98 //
99 // redirectIndex = hash(path, DEFAULT_SEED) % table_length;
100 // redirect = redirectTable[redirectIndex];
101 // if (redirect == 0) return not found;
102 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length;
103 // location = locationTable[locationIndex];
104 // if (!verify(location, path)) return not found;
105 // return location;
106 //
107 // Note: The hash function takes an initial seed value. A different seed value
108 // usually returns a different result for strings that would otherwise collide with
109 // other seeds. The verify function guarantees the found resource location is
110 // indeed the resource we are looking for.
111 //
112 // The following is the format of the index;
113 //
114 // +-------------------+
115 // | Redirect Table |
116 // +-------------------+
117 // | Attribute Offsets |
118 // +-------------------+
119 // | Attribute Data |
120 // +-------------------+
121 // | Strings |
122 // +-------------------+
123 //
124 // Redirect Table - Array of 32-bit signed values representing actions that
125 // should take place for hashed strings that map to that
126 // value. Negative values indicate no hash collision and can be
127 // quickly converted to indices into attribute offsets. Positive
128 // values represent a new seed for hashing an index into attribute
129 // offsets. Zero indicates not found.
130 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into
131 // attribute data. Attribute offsets can be iterated to do a
132 // full survey of resources in the image. Offset of zero
133 // indicates no attributes.
134 // Attribute Data - Bytes representing compact attribute data for locations. (See
135 // comments in ImageLocation.)
136 // Strings - Collection of zero terminated UTF-8 strings used by the index and
137 // image meta data. Each string is accessed by offset. Each string is
138 // unique. Offset zero is reserved for the empty string.
139 //
140 // Note that the memory mapped index assumes 32 bit alignment of each component
141 // in the index.
142 //
143 // Endianness of an image.
144 // An image booted by hotspot is always in native endian. However, it is possible
145 // to read (by the JDK) in alternate endian format. Primarily, this is during
146 // cross platform scenarios. Ex, where javac needs to read an embedded image
147 // to access classes for crossing compilation.
148 //
149
150 class ImageFileReader; // forward declaration
151
152 // Manage image file string table.
153 class ImageStrings {
154 private:
155 u1* _data; // Data bytes for strings.
156 u4 _size; // Number of bytes in the string table.
157 public:
158 enum {
159 // Not found result from find routine.
160 NOT_FOUND = -1,
161 // Prime used to generate hash for Perfect Hashing.
162 HASH_MULTIPLIER = 0x01000193
163 };
164
165 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
166
167 // Return the UTF-8 string beginning at offset.
168 inline const char* get(u4 offset) const {
169 assert(offset < _size && "offset exceeds string table size");
170 return (const char*)(_data + offset);
171 }
172
173 // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
174 inline static u4 hash_code(const char* string) {
175 return hash_code(string, HASH_MULTIPLIER);
176 }
177
178 // Compute the Perfect Hashing hash code for the supplied string, starting at seed.
179 static s4 hash_code(const char* string, s4 seed);
180
181 // Match up a string in a perfect hash table. Result still needs validation
182 // for precise match.
183 static s4 find(Endian* endian, const char* name, s4* redirect, u4 length);
184
185 // Test to see if UTF-8 string begins with the start UTF-8 string. If so,
186 // return non-NULL address of remaining portion of string. Otherwise, return
187 // NULL. Used to test sections of a path without copying from image string
188 // table.
189 static const char* starts_with(const char* string, const char* start);
190
191 // Test to see if UTF-8 string begins with start char. If so, return non-NULL
192 // address of remaining portion of string. Otherwise, return NULL. Used
193 // to test a character of a path without copying.
194 inline static const char* starts_with(const char* string, const char ch) {
195 return *string == ch ? string + 1 : NULL;
196 }
197 };
198
199 // Manage image file location attribute data. Within an image, a location's
200 // attributes are compressed into a stream of bytes. An attribute stream is
201 // composed of individual attribute sequences. Each attribute sequence begins with
202 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the
203 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the
204 // attribute value. Attribute values present as most significant byte first.
205 //
206 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x2A
207 // (kind = 5, length = 3), 0x03, 0x35, 0x62.
208 //
209 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header
210 // byte of zero.)
211 //
212 // ImageLocation inflates the stream into individual values stored in the long
213 // array _attributes. This allows an attribute value can be quickly accessed by
214 // direct indexing. Unspecified values default to zero.
215 //
216 // Notes:
217 // - Even though ATTRIBUTE_END (which might be encoded with a zero byte) is used to
218 // mark the end of the attribute stream, streams will contain zero byte values
219 // in the non-header portion of the attribute data. Thus, detecting a zero byte
220 // is not sufficient to detect the end of an attribute stream.
221 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
222 // storing the resources. Thus, in an image this represents the number of bytes
223 // after the index.
224 // - Currently, compressed resources are represented by having a non-zero
225 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the
226 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
227 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value
228 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and
229 // in memory. In the future, additional compression techniques will be used and
230 // represented differently.
231 // - Package strings include trailing slash and extensions include prefix period.
232 //
233 class ImageLocation {
234 public:
235 enum {
236 ATTRIBUTE_END, // End of attribute stream marker
237 ATTRIBUTE_MODULE, // String table offset of module name
238 ATTRIBUTE_PARENT, // String table offset of resource path parent
239 ATTRIBUTE_BASE, // String table offset of resource path base
240 ATTRIBUTE_EXTENSION, // String table offset of resource path extension
241 ATTRIBUTE_OFFSET, // Container byte offset of resource
242 ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource
243 ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource
244 ATTRIBUTE_COUNT // Number of attribute kinds
245 };
246
247 private:
248 // Values of inflated attributes.
249 u8 _attributes[ATTRIBUTE_COUNT];
250
251 // Return the attribute value number of bytes.
252 inline static u1 attribute_length(u1 data) {
253 return (data & 0x7) + 1;
254 }
255
256 // Return the attribute kind.
257 inline static u1 attribute_kind(u1 data) {
258 u1 kind = data >> 3;
259 assert(kind < ATTRIBUTE_COUNT && "invalid attribute kind");
260 return kind;
261 }
262
263 // Return the attribute length.
264 inline static u8 attribute_value(u1* data, u1 n) {
265 assert(0 < n && n <= 8 && "invalid attribute value length");
266 u8 value = 0;
267 // Most significant bytes first.
268 for (u1 i = 0; i < n; i++) {
269 value <<= 8;
270 value |= data[i];
271 }
272 return value;
273 }
274
275 public:
276 ImageLocation() {
277 clear_data();
278 }
279
280 ImageLocation(u1* data) {
281 clear_data();
282 set_data(data);
283 }
284
285 // Inflates the attribute stream into individual values stored in the long
286 // array _attributes. This allows an attribute value to be quickly accessed by
287 // direct indexing. Unspecified values default to zero.
288 void set_data(u1* data);
289
290 // Zero all attribute values.
291 void clear_data();
292
293 // Retrieve an attribute value from the inflated array.
294 inline u8 get_attribute(u1 kind) const {
295 assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT && "invalid attribute kind");
296 return _attributes[kind];
297 }
298
299 // Retrieve an attribute string value from the inflated array.
300 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {
301 return strings.get((u4)get_attribute(kind));
302 }
303 };
304
305 // Image file header, starting at offset 0.
306 class ImageHeader {
307 private:
308 u4 _magic; // Image file marker
309 u4 _version; // Image file major version number
310 u4 _flags; // Image file flags
311 u4 _resource_count; // Number of resources in file
312 u4 _table_length; // Number of slots in index tables
313 u4 _locations_size; // Number of bytes in attribute table
314 u4 _strings_size; // Number of bytes in string table
315
316 public:
317 u4 magic() const { return _magic; }
318 u4 magic(Endian* endian) const { return endian->get(_magic); }
319 void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); }
320
321 u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; }
322 u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; }
323 void set_version(Endian* endian, u4 major_version, u4 minor_version) {
324 return endian->set(_version, major_version << 16 | minor_version);
325 }
326
327 u4 flags(Endian* endian) const { return endian->get(_flags); }
328 void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); }
329
330 u4 resource_count(Endian* endian) const { return endian->get(_resource_count); }
331 void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); }
332
333 u4 table_length(Endian* endian) const { return endian->get(_table_length); }
334 void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); }
335
336 u4 locations_size(Endian* endian) const { return endian->get(_locations_size); }
337 void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); }
338
339 u4 strings_size(Endian* endian) const { return endian->get(_strings_size); }
340 void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); }
341 };
342
343 // Max path length limit independent of platform. Windows max path is 1024,
344 // other platforms use 4096. The JCK fails several tests when 1024 is used.
345 #define IMAGE_MAX_PATH 4096
346
347 class ImageFileReader;
348
349 // Manage a table of open image files. This table allows multiple access points
350 // to share an open image.
351 class ImageFileReaderTable {
352 private:
353 const static u4 _growth = 8; // Growth rate of the table
354 u4 _count; // Number of entries in the table
355 u4 _max; // Maximum number of entries allocated
356 ImageFileReader** _table; // Growable array of entries
357
358 public:
359 ImageFileReaderTable();
360 // ~ImageFileReaderTable()
361 // Bug 8166727
362 //
363 // WARNING: Should never close jimage files.
364 // Threads may still be running during shutdown.
365 //
366
367 // Return the number of entries.
368 inline u4 count() { return _count; }
369
370 // Return the ith entry from the table.
371 inline ImageFileReader* get(u4 i) { return _table[i]; }
372
373 // Add a new image entry to the table.
374 void add(ImageFileReader* image);
375
376 // Remove an image entry from the table.
377 void remove(ImageFileReader* image);
378
379 // Determine if image entry is in table.
380 bool contains(ImageFileReader* image);
381 };
382
383 // Manage the image file.
384 // ImageFileReader manages the content of an image file.
385 // Initially, the header of the image file is read for validation. If valid,
386 // values in the header are used calculate the size of the image index. The
387 // index is then memory mapped to allow load on demand and sharing. The
388 // -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.)
389 // An image can be used by Hotspot and multiple reference points in the JDK, thus
390 // it is desirable to share a reader. To accommodate sharing, a share table is
391 // defined (see ImageFileReaderTable in imageFile.cpp) To track the number of
392 // uses, ImageFileReader keeps a use count (_use). Use is incremented when
393 // 'opened' by reference point and decremented when 'closed'. Use of zero
394 // leads the ImageFileReader to be actually closed and discarded.
395 class ImageFileReader {
396 friend class ImageFileReaderTable;
397 private:
398 // Manage a number of image files such that an image can be shared across
399 // multiple uses (ex. loader.)
400 static ImageFileReaderTable _reader_table;
401
402 // true if image should be fully memory mapped.
403 static bool memory_map_image;
404
405 char* _name; // Name of image
406 s4 _use; // Use count
407 int _fd; // File descriptor
408 Endian* _endian; // Endian handler
409 u8 _file_size; // File size in bytes
410 ImageHeader _header; // Image header
411 size_t _index_size; // Total size of index
412 u1* _index_data; // Raw index data
413 s4* _redirect_table; // Perfect hash redirect table
414 u4* _offsets_table; // Location offset table
415 u1* _location_bytes; // Location attributes
416 u1* _string_bytes; // String table
417
418 ImageFileReader(const char* name, bool big_endian);
419 ~ImageFileReader();
420
421 // Compute number of bytes in image file index.
422 inline size_t index_size() {
423 return sizeof(ImageHeader) +
424 table_length() * sizeof(u4) * 2 + locations_size() + strings_size();
425 }
426
427 public:
428 enum {
429 // Image file marker.
430 IMAGE_MAGIC = 0xCAFEDADA,
431 // Endian inverted Image file marker.
432 IMAGE_MAGIC_INVERT = 0xDADAFECA,
433 // Image file major version number.
434 MAJOR_VERSION = 1,
435 // Image file minor version number.
436 MINOR_VERSION = 0
437 };
438
439 // Locate an image if file already open.
440 static ImageFileReader* find_image(const char* name);
441
442 // Open an image file, reuse structure if file already open.
443 static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian());
444
445 // Close an image file if the file is not in use elsewhere.
446 static void close(ImageFileReader *reader);
447
448 // Return an id for the specified ImageFileReader.
449 static u8 reader_to_ID(ImageFileReader *reader);
450
451 // Validate the image id.
452 static bool id_check(u8 id);
453
454 // Return an id for the specified ImageFileReader.
455 static ImageFileReader* id_to_reader(u8 id);
456
457 // Open image file for read access.
458 bool open();
459
460 // Close image file.
461 void close();
462
463 // Read directly from the file.
464 bool read_at(u1* data, u8 size, u8 offset) const;
465
466 inline Endian* endian() const { return _endian; }
467
468 // Retrieve name of image file.
469 inline const char* name() const {
470 return _name;
471 }
472
473 // Retrieve size of image file.
474 inline u8 file_size() const {
475 return _file_size;
476 }
477
478 // Retrieve the size of the mapped image.
479 inline u8 map_size() const {
480 return (u8)(memory_map_image ? _file_size : _index_size);
481 }
482
483 // Return first address of index data.
484 inline u1* get_index_address() const {
485 return _index_data;
486 }
487
488 // Return first address of resource data.
489 inline u1* get_data_address() const {
490 return _index_data + _index_size;
491 }
492
493 // Get the size of the index data.
494 size_t get_index_size() const {
495 return _index_size;
496 }
497
498 inline u4 table_length() const {
499 return _header.table_length(_endian);
500 }
501
502 inline u4 locations_size() const {
503 return _header.locations_size(_endian);
504 }
505
506 inline u4 strings_size()const {
507 return _header.strings_size(_endian);
508 }
509
510 inline u4* offsets_table() const {
511 return _offsets_table;
512 }
513
514 // Increment use count.
515 inline void inc_use() {
516 _use++;
517 }
518
519 // Decrement use count.
520 inline bool dec_use() {
521 return --_use == 0;
522 }
523
524 // Return a string table accessor.
525 inline const ImageStrings get_strings() const {
526 return ImageStrings(_string_bytes, _header.strings_size(_endian));
527 }
528
529 // Return location attribute stream at offset.
530 inline u1* get_location_offset_data(u4 offset) const {
531 assert((u4)offset < _header.locations_size(_endian) &&
532 "offset exceeds location attributes size");
533 return offset != 0 ? _location_bytes + offset : NULL;
534 }
535
536 // Return location attribute stream for location i.
537 inline u1* get_location_data(u4 index) const {
538 return get_location_offset_data(get_location_offset(index));
539 }
540
541 // Return the location offset for index.
542 inline u4 get_location_offset(u4 index) const {
543 assert((u4)index < _header.table_length(_endian) &&
544 "index exceeds location count");
545 return _endian->get(_offsets_table[index]);
546 }
547
548 // Find the location attributes associated with the path. Returns true if
549 // the location is found, false otherwise.
550 bool find_location(const char* path, ImageLocation& location) const;
551
552 // Find the location index and size associated with the path.
553 // Returns the location index and size if the location is found,
554 // ImageFileReader::NOT_FOUND otherwise.
555 u4 find_location_index(const char* path, u8 *size) const;
556
557 // Verify that a found location matches the supplied path.
558 bool verify_location(ImageLocation& location, const char* path) const;
559
560 // Return the resource for the supplied location index.
561 void get_resource(u4 index, u1* uncompressed_data) const;
562
563 // Return the resource for the supplied path.
564 void get_resource(ImageLocation& location, u1* uncompressed_data) const;
565 };
566 #endif // LIBJIMAGE_IMAGEFILE_HPP