1 /* 2 * Copyright (c) 2015, 2025, Oracle and/or its affiliates. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * - Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * - Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * - Neither the name of Oracle nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #ifndef LIBJIMAGE_IMAGEFILE_HPP 33 #define LIBJIMAGE_IMAGEFILE_HPP 34 35 #include <assert.h> 36 37 #include "endian.hpp" 38 #include "inttypes.hpp" 39 40 // Image files are an alternate file format for storing classes and resources. The 41 // goal is to supply file access which is faster and smaller than the jar format. 42 // It should be noted that unlike jars, information stored in an image is in native 43 // endian format. This allows the image to be mapped into memory without endian 44 // translation. This also means that images are platform dependent. 45 // 46 // Image files are structured as three sections; 47 // 48 // +-----------+ 49 // | Header | 50 // +-----------+ 51 // | | 52 // | Index | 53 // | | 54 // +-----------+ 55 // | | 56 // | | 57 // | Resources | 58 // | | 59 // | | 60 // +-----------+ 61 // 62 // The header contains information related to identification and description of 63 // contents. 64 // 65 // +-------------------------+ 66 // | Magic (0xCAFEDADA) | 67 // +------------+------------+ 68 // | Major Vers | Minor Vers | 69 // +------------+------------+ 70 // | Flags | 71 // +-------------------------+ 72 // | Resource Count | 73 // +-------------------------+ 74 // | Table Length | 75 // +-------------------------+ 76 // | Attributes Size | 77 // +-------------------------+ 78 // | Strings Size | 79 // +-------------------------+ 80 // 81 // Magic - means of identifying validity of the file. This avoids requiring a 82 // special file extension. 83 // Major vers, minor vers - differences in version numbers indicate structural 84 // changes in the image. 85 // Flags - various image wide flags (future). 86 // Resource count - number of resources in the file. 87 // Table length - the length of lookup tables used in the index. 88 // Attributes size - number of bytes in the region used to store location attribute 89 // streams. 90 // Strings size - the size of the region used to store strings used by the 91 // index and meta data. 92 // 93 // The index contains information related to resource lookup. The algorithm 94 // used for lookup is "A Practical Minimal Perfect Hashing Method" 95 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string 96 // in the form /<module>/<package>/<base>.<extension> return the resource location 97 // information; 98 // 99 // redirectIndex = hash(path, DEFAULT_SEED) % table_length; 100 // redirect = redirectTable[redirectIndex]; 101 // if (redirect == 0) return not found; 102 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length; 103 // location = locationTable[locationIndex]; 104 // if (!verify(location, path)) return not found; 105 // return location; 106 // 107 // Note: The hash function takes an initial seed value. A different seed value 108 // usually returns a different result for strings that would otherwise collide with 109 // other seeds. The verify function guarantees the found resource location is 110 // indeed the resource we are looking for. 111 // 112 // The following is the format of the index; 113 // 114 // +-------------------+ 115 // | Redirect Table | 116 // +-------------------+ 117 // | Attribute Offsets | 118 // +-------------------+ 119 // | Attribute Data | 120 // +-------------------+ 121 // | Strings | 122 // +-------------------+ 123 // 124 // Redirect Table - Array of 32-bit signed values representing actions that 125 // should take place for hashed strings that map to that 126 // value. Negative values indicate no hash collision and can be 127 // quickly converted to indices into attribute offsets. Positive 128 // values represent a new seed for hashing an index into attribute 129 // offsets. Zero indicates not found. 130 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into 131 // attribute data. Attribute offsets can be iterated to do a 132 // full survey of resources in the image. Offset of zero 133 // indicates no attributes. 134 // Attribute Data - Bytes representing compact attribute data for locations. (See 135 // comments in ImageLocation.) 136 // Strings - Collection of zero terminated UTF-8 strings used by the index and 137 // image meta data. Each string is accessed by offset. Each string is 138 // unique. Offset zero is reserved for the empty string. 139 // 140 // Note that the memory mapped index assumes 32 bit alignment of each component 141 // in the index. 142 // 143 // Endianness of an image. 144 // An image booted by hotspot is always in native endian. However, it is possible 145 // to read (by the JDK) in alternate endian format. Primarily, this is during 146 // cross platform scenarios. Ex, where javac needs to read an embedded image 147 // to access classes for crossing compilation. 148 // 149 150 class ImageFileReader; // forward declaration 151 152 // Manage image file string table. 153 class ImageStrings { 154 private: 155 u1* _data; // Data bytes for strings. 156 u4 _size; // Number of bytes in the string table. 157 public: 158 enum { 159 // Not found result from find routine. 160 NOT_FOUND = -1, 161 // Prime used to generate hash for Perfect Hashing. 162 HASH_MULTIPLIER = 0x01000193 163 }; 164 165 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {} 166 167 // Return the UTF-8 string beginning at offset. 168 inline const char* get(u4 offset) const { 169 assert(offset < _size && "offset exceeds string table size"); 170 return (const char*)(_data + offset); 171 } 172 173 // Compute the Perfect Hashing hash code for the supplied UTF-8 string. 174 inline static u4 hash_code(const char* string) { 175 return hash_code(string, HASH_MULTIPLIER); 176 } 177 178 // Compute the Perfect Hashing hash code for the supplied string, starting at seed. 179 static s4 hash_code(const char* string, s4 seed); 180 181 // Match up a string in a perfect hash table. Result still needs validation 182 // for precise match. 183 static s4 find(Endian* endian, const char* name, s4* redirect, u4 length); 184 185 // Test to see if UTF-8 string begins with the start UTF-8 string. If so, 186 // return non-NULL address of remaining portion of string. Otherwise, return 187 // NULL. Used to test sections of a path without copying from image string 188 // table. 189 static const char* starts_with(const char* string, const char* start); 190 191 // Test to see if UTF-8 string begins with start char. If so, return non-NULL 192 // address of remaining portion of string. Otherwise, return NULL. Used 193 // to test a character of a path without copying. 194 inline static const char* starts_with(const char* string, const char ch) { 195 return *string == ch ? string + 1 : NULL; 196 } 197 }; 198 199 // Manage image file location attribute data. Within an image, a location's 200 // attributes are compressed into a stream of bytes. An attribute stream is 201 // composed of individual attribute sequences. Each attribute sequence begins with 202 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the 203 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the 204 // attribute value. Attribute values present as most significant byte first. 205 // 206 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x2A 207 // (kind = 5, length = 3), 0x03, 0x35, 0x62. 208 // 209 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header 210 // byte of zero.) 211 // 212 // ImageLocation inflates the stream into individual values stored in the long 213 // array _attributes. This allows an attribute value can be quickly accessed by 214 // direct indexing. Unspecified values default to zero. 215 // 216 // Notes: 217 // - Even though ATTRIBUTE_END (which might be encoded with a zero byte) is used to 218 // mark the end of the attribute stream, streams will contain zero byte values 219 // in the non-header portion of the attribute data. Thus, detecting a zero byte 220 // is not sufficient to detect the end of an attribute stream. 221 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region 222 // storing the resources. Thus, in an image this represents the number of bytes 223 // after the index. 224 // - Currently, compressed resources are represented by having a non-zero 225 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the 226 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the 227 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value 228 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and 229 // in memory. In the future, additional compression techniques will be used and 230 // represented differently. 231 // - Package strings include trailing slash and extensions include prefix period. 232 // 233 class ImageLocation { 234 public: 235 enum { 236 ATTRIBUTE_END, // End of attribute stream marker 237 ATTRIBUTE_MODULE, // String table offset of module name 238 ATTRIBUTE_PARENT, // String table offset of resource path parent 239 ATTRIBUTE_BASE, // String table offset of resource path base 240 ATTRIBUTE_EXTENSION, // String table offset of resource path extension 241 ATTRIBUTE_OFFSET, // Container byte offset of resource 242 ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource 243 ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource 244 ATTRIBUTE_COUNT // Number of attribute kinds 245 }; 246 247 private: 248 // Values of inflated attributes. 249 u8 _attributes[ATTRIBUTE_COUNT]; 250 251 // Return the attribute value number of bytes. 252 inline static u1 attribute_length(u1 data) { 253 return (data & 0x7) + 1; 254 } 255 256 // Return the attribute kind. 257 inline static u1 attribute_kind(u1 data) { 258 u1 kind = data >> 3; 259 assert(kind < ATTRIBUTE_COUNT && "invalid attribute kind"); 260 return kind; 261 } 262 263 // Return the attribute length. 264 inline static u8 attribute_value(u1* data, u1 n) { 265 assert(0 < n && n <= 8 && "invalid attribute value length"); 266 u8 value = 0; 267 // Most significant bytes first. 268 for (u1 i = 0; i < n; i++) { 269 value <<= 8; 270 value |= data[i]; 271 } 272 return value; 273 } 274 275 public: 276 ImageLocation() { 277 clear_data(); 278 } 279 280 ImageLocation(u1* data) { 281 clear_data(); 282 set_data(data); 283 } 284 285 // Inflates the attribute stream into individual values stored in the long 286 // array _attributes. This allows an attribute value to be quickly accessed by 287 // direct indexing. Unspecified values default to zero. 288 void set_data(u1* data); 289 290 // Zero all attribute values. 291 void clear_data(); 292 293 // Retrieve an attribute value from the inflated array. 294 inline u8 get_attribute(u1 kind) const { 295 assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT && "invalid attribute kind"); 296 return _attributes[kind]; 297 } 298 299 // Retrieve an attribute string value from the inflated array. 300 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const { 301 return strings.get((u4)get_attribute(kind)); 302 } 303 }; 304 305 // Image file header, starting at offset 0. 306 class ImageHeader { 307 private: 308 u4 _magic; // Image file marker 309 u4 _version; // Image file major version number 310 u4 _flags; // Image file flags 311 u4 _resource_count; // Number of resources in file 312 u4 _table_length; // Number of slots in index tables 313 u4 _locations_size; // Number of bytes in attribute table 314 u4 _strings_size; // Number of bytes in string table 315 316 public: 317 u4 magic() const { return _magic; } 318 u4 magic(Endian* endian) const { return endian->get(_magic); } 319 void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); } 320 321 u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; } 322 u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; } 323 void set_version(Endian* endian, u4 major_version, u4 minor_version) { 324 return endian->set(_version, major_version << 16 | minor_version); 325 } 326 327 u4 flags(Endian* endian) const { return endian->get(_flags); } 328 void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); } 329 330 u4 resource_count(Endian* endian) const { return endian->get(_resource_count); } 331 void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); } 332 333 u4 table_length(Endian* endian) const { return endian->get(_table_length); } 334 void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); } 335 336 u4 locations_size(Endian* endian) const { return endian->get(_locations_size); } 337 void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); } 338 339 u4 strings_size(Endian* endian) const { return endian->get(_strings_size); } 340 void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); } 341 }; 342 343 // Max path length limit independent of platform. Windows max path is 1024, 344 // other platforms use 4096. The JCK fails several tests when 1024 is used. 345 #define IMAGE_MAX_PATH 4096 346 347 class ImageFileReader; 348 349 // Manage a table of open image files. This table allows multiple access points 350 // to share an open image. 351 class ImageFileReaderTable { 352 private: 353 const static u4 _growth = 8; // Growth rate of the table 354 u4 _count; // Number of entries in the table 355 u4 _max; // Maximum number of entries allocated 356 ImageFileReader** _table; // Growable array of entries 357 358 public: 359 ImageFileReaderTable(); 360 // ~ImageFileReaderTable() 361 // Bug 8166727 362 // 363 // WARNING: Should never close jimage files. 364 // Threads may still be running during shutdown. 365 // 366 367 // Return the number of entries. 368 inline u4 count() { return _count; } 369 370 // Return the ith entry from the table. 371 inline ImageFileReader* get(u4 i) { return _table[i]; } 372 373 // Add a new image entry to the table. 374 void add(ImageFileReader* image); 375 376 // Remove an image entry from the table. 377 void remove(ImageFileReader* image); 378 379 // Determine if image entry is in table. 380 bool contains(ImageFileReader* image); 381 }; 382 383 // Manage the image file. 384 // ImageFileReader manages the content of an image file. 385 // Initially, the header of the image file is read for validation. If valid, 386 // values in the header are used calculate the size of the image index. The 387 // index is then memory mapped to allow load on demand and sharing. The 388 // -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.) 389 // An image can be used by Hotspot and multiple reference points in the JDK, thus 390 // it is desirable to share a reader. To accommodate sharing, a share table is 391 // defined (see ImageFileReaderTable in imageFile.cpp) To track the number of 392 // uses, ImageFileReader keeps a use count (_use). Use is incremented when 393 // 'opened' by reference point and decremented when 'closed'. Use of zero 394 // leads the ImageFileReader to be actually closed and discarded. 395 class ImageFileReader { 396 friend class ImageFileReaderTable; 397 private: 398 // Manage a number of image files such that an image can be shared across 399 // multiple uses (ex. loader.) 400 static ImageFileReaderTable _reader_table; 401 402 // true if image should be fully memory mapped. 403 static bool memory_map_image; 404 405 char* _name; // Name of image 406 s4 _use; // Use count 407 int _fd; // File descriptor 408 Endian* _endian; // Endian handler 409 u8 _file_size; // File size in bytes 410 ImageHeader _header; // Image header 411 size_t _index_size; // Total size of index 412 u1* _index_data; // Raw index data 413 s4* _redirect_table; // Perfect hash redirect table 414 u4* _offsets_table; // Location offset table 415 u1* _location_bytes; // Location attributes 416 u1* _string_bytes; // String table 417 418 ImageFileReader(const char* name, bool big_endian); 419 ~ImageFileReader(); 420 421 // Compute number of bytes in image file index. 422 inline size_t index_size() { 423 return sizeof(ImageHeader) + 424 table_length() * sizeof(u4) * 2 + locations_size() + strings_size(); 425 } 426 427 public: 428 enum { 429 // Image file marker. 430 IMAGE_MAGIC = 0xCAFEDADA, 431 // Endian inverted Image file marker. 432 IMAGE_MAGIC_INVERT = 0xDADAFECA, 433 // Image file major version number. 434 MAJOR_VERSION = 1, 435 // Image file minor version number. 436 MINOR_VERSION = 0 437 }; 438 439 // Locate an image if file already open. 440 static ImageFileReader* find_image(const char* name); 441 442 // Open an image file, reuse structure if file already open. 443 static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian()); 444 445 // Close an image file if the file is not in use elsewhere. 446 static void close(ImageFileReader *reader); 447 448 // Return an id for the specified ImageFileReader. 449 static u8 reader_to_ID(ImageFileReader *reader); 450 451 // Validate the image id. 452 static bool id_check(u8 id); 453 454 // Return an id for the specified ImageFileReader. 455 static ImageFileReader* id_to_reader(u8 id); 456 457 // Open image file for read access. 458 bool open(); 459 460 // Close image file. 461 void close(); 462 463 // Read directly from the file. 464 bool read_at(u1* data, u8 size, u8 offset) const; 465 466 inline Endian* endian() const { return _endian; } 467 468 // Retrieve name of image file. 469 inline const char* name() const { 470 return _name; 471 } 472 473 // Retrieve size of image file. 474 inline u8 file_size() const { 475 return _file_size; 476 } 477 478 // Retrieve the size of the mapped image. 479 inline u8 map_size() const { 480 return (u8)(memory_map_image ? _file_size : _index_size); 481 } 482 483 // Return first address of index data. 484 inline u1* get_index_address() const { 485 return _index_data; 486 } 487 488 // Return first address of resource data. 489 inline u1* get_data_address() const { 490 return _index_data + _index_size; 491 } 492 493 // Get the size of the index data. 494 size_t get_index_size() const { 495 return _index_size; 496 } 497 498 inline u4 table_length() const { 499 return _header.table_length(_endian); 500 } 501 502 inline u4 locations_size() const { 503 return _header.locations_size(_endian); 504 } 505 506 inline u4 strings_size()const { 507 return _header.strings_size(_endian); 508 } 509 510 inline u4* offsets_table() const { 511 return _offsets_table; 512 } 513 514 // Increment use count. 515 inline void inc_use() { 516 _use++; 517 } 518 519 // Decrement use count. 520 inline bool dec_use() { 521 return --_use == 0; 522 } 523 524 // Return a string table accessor. 525 inline const ImageStrings get_strings() const { 526 return ImageStrings(_string_bytes, _header.strings_size(_endian)); 527 } 528 529 // Return location attribute stream at offset. 530 inline u1* get_location_offset_data(u4 offset) const { 531 assert((u4)offset < _header.locations_size(_endian) && 532 "offset exceeds location attributes size"); 533 return offset != 0 ? _location_bytes + offset : NULL; 534 } 535 536 // Return location attribute stream for location i. 537 inline u1* get_location_data(u4 index) const { 538 return get_location_offset_data(get_location_offset(index)); 539 } 540 541 // Return the location offset for index. 542 inline u4 get_location_offset(u4 index) const { 543 assert((u4)index < _header.table_length(_endian) && 544 "index exceeds location count"); 545 return _endian->get(_offsets_table[index]); 546 } 547 548 // Find the location attributes associated with the path. Returns true if 549 // the location is found, false otherwise. 550 bool find_location(const char* path, ImageLocation& location) const; 551 552 // Find the location index and size associated with the path. 553 // Returns the location index and size if the location is found, 554 // ImageFileReader::NOT_FOUND otherwise. 555 u4 find_location_index(const char* path, u8 *size) const; 556 557 // Verify that a found location matches the supplied path. 558 bool verify_location(ImageLocation& location, const char* path) const; 559 560 // Return the resource for the supplied location index. 561 void get_resource(u4 index, u1* uncompressed_data) const; 562 563 // Return the resource for the supplied path. 564 void get_resource(ImageLocation& location, u1* uncompressed_data) const; 565 }; 566 #endif // LIBJIMAGE_IMAGEFILE_HPP