1 /*
  2  * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * The Universal Permissive License (UPL), Version 1.0
  6  *
  7  * Subject to the condition set forth below, permission is hereby granted to
  8  * any person obtaining a copy of this software, associated documentation
  9  * and/or data (collectively the "Software"), free of charge and under any
 10  * and all copyright rights in the Software, and any and all patent rights
 11  * owned or freely licensable by each licensor hereunder covering either (i)
 12  * the unmodified Software as contributed to or provided by such licensor,
 13  * or (ii) the Larger Works (as defined below), to deal in both
 14  *
 15  * (a) the Software, and
 16  *
 17  * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file
 18  * if one is included with the Software (each a "Larger Work" to which the
 19  * Software is contributed by such licensors),
 20  *
 21  * without restriction, including without limitation the rights to copy,
 22  * create derivative works of, display, perform, and distribute the Software
 23  * and make, use, sell, offer for sale, import, export, have made, and have
 24  * sold the Software and the Larger Work(s), and to sublicense the foregoing
 25  * rights on either these or other terms.
 26  *
 27  * This license is subject to the following condition:
 28  *
 29  * The above copyright notice and either this complete permission notice or
 30  * at a minimum a reference to the UPL must be included in all copies or
 31  * substantial portions of the Software.
 32  *
 33  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 34  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 35  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 36  * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 37  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 38  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 39  * USE OR OTHER DEALINGS IN THE SOFTWARE.
 40  *
 41  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 42  * or visit www.oracle.com if you need additional information or have any
 43  * questions.
 44  *
 45  */
 46 
 47 /* hsdis.c -- dump a range of addresses as native instructions
 48    This implements the plugin protocol required by the
 49    HotSpot PrintAssembly option.
 50 */
 51 
 52 #include <config.h> /* required by bfd.h */
 53 #include <errno.h>
 54 #include <inttypes.h>
 55 #include <string.h>
 56 #include <libiberty.h>
 57 #include <bfd.h>
 58 #include <bfdver.h>
 59 #include <dis-asm.h>
 60 #include "hsdis.h"
 61 
 62 #ifndef bool
 63 #define bool int
 64 #define true 1
 65 #define false 0
 66 #endif /*bool*/
 67 
 68 /* short names for stuff in hsdis.h */
 69 typedef decode_instructions_event_callback_ftype  event_callback_t;
 70 typedef decode_instructions_printf_callback_ftype printf_callback_t;
 71 
 72 /* disassemble_info.application_data object */
 73 struct hsdis_app_data {
 74   /* virtual address of data */
 75   uintptr_t start_va, end_va;
 76   /* the instructions to be decoded */
 77   unsigned char* buffer;
 78   uintptr_t length;
 79   event_callback_t  event_callback;  void* event_stream;
 80   printf_callback_t printf_callback; void* printf_stream;
 81   bool losing;
 82   bool do_newline;
 83 
 84   /* the architecture being disassembled */
 85   const char* arch_name;
 86   const bfd_arch_info_type* arch_info;
 87 
 88   /* the disassembler we are going to use: */
 89   disassembler_ftype      dfn;
 90   struct disassemble_info dinfo; /* the actual struct! */
 91 
 92   char mach_option[64];
 93   char insn_options[256];
 94 };
 95 
 96 static void* decode(struct hsdis_app_data* app_data, const char* options);
 97 
 98 #define DECL_APP_DATA(dinfo) \
 99   struct hsdis_app_data* app_data = (struct hsdis_app_data*) (dinfo)->application_data
100 
101 #define DECL_EVENT_CALLBACK(app_data) \
102   event_callback_t  event_callback = (app_data)->event_callback; \
103   void*             event_stream   = (app_data)->event_stream
104 
105 #define DECL_PRINTF_CALLBACK(app_data) \
106   printf_callback_t  printf_callback = (app_data)->printf_callback; \
107   void*              printf_stream   = (app_data)->printf_stream
108 
109 
110 static void print_help(struct hsdis_app_data* app_data,
111                        const char* msg, const char* arg);
112 static void setup_app_data(struct hsdis_app_data* app_data,
113                            const char* options);
114 static const char* format_insn_close(const char* close,
115                                      disassemble_info* dinfo,
116                                      char* buf, size_t bufsize);
117 
118 void*
119 #ifdef DLL_ENTRY
120   DLL_ENTRY
121 #endif
122 decode_instructions_virtual(uintptr_t start_va, uintptr_t end_va,
123                             unsigned char* buffer, uintptr_t length,
124                             event_callback_t  event_callback_arg,  void* event_stream_arg,
125                             printf_callback_t printf_callback_arg, void* printf_stream_arg,
126                             const char* options, int newline) {
127   struct hsdis_app_data app_data;
128   memset(&app_data, 0, sizeof(app_data));
129   app_data.start_va    = start_va;
130   app_data.end_va      = end_va;
131   app_data.buffer = buffer;
132   app_data.length = length;
133   app_data.event_callback  = event_callback_arg;
134   app_data.event_stream    = event_stream_arg;
135   app_data.printf_callback = printf_callback_arg;
136   app_data.printf_stream   = printf_stream_arg;
137   app_data.do_newline = newline == 0 ? false : true;
138 
139   return decode(&app_data, options);
140 }
141 
142 /* This is the compatability interface for older version of hotspot */
143 void*
144 #ifdef DLL_ENTRY
145   DLL_ENTRY
146 #endif
147 decode_instructions(void* start_pv, void* end_pv,
148                     event_callback_t  event_callback_arg,  void* event_stream_arg,
149                     printf_callback_t printf_callback_arg, void* printf_stream_arg,
150                     const char* options) {
151   return decode_instructions_virtual((uintptr_t)start_pv,
152                                      (uintptr_t)end_pv,
153                                      (unsigned char*)start_pv,
154                                      (uintptr_t)end_pv - (uintptr_t)start_pv,
155                                      event_callback_arg,
156                                      event_stream_arg,
157                                      printf_callback_arg,
158                                      printf_stream_arg,
159                                      options, false);
160 }
161 
162 static void* decode(struct hsdis_app_data* app_data, const char* options) {
163   setup_app_data(app_data, options);
164   char buf[128];
165 
166   {
167     /* now reload everything from app_data: */
168     DECL_EVENT_CALLBACK(app_data);
169     DECL_PRINTF_CALLBACK(app_data);
170     uintptr_t start = app_data->start_va;
171     uintptr_t end   = app_data->end_va;
172     uintptr_t p     = start;
173 
174     (*event_callback)(event_stream, "insns", (void*)start);
175 
176     (*event_callback)(event_stream, "mach name='%s'",
177                       (void*) app_data->arch_info->printable_name);
178     if (app_data->dinfo.bytes_per_line != 0) {
179       (*event_callback)(event_stream, "format bytes-per-line='%p'/",
180                         (void*)(intptr_t) app_data->dinfo.bytes_per_line);
181     }
182 
183     while (p < end && !app_data->losing) {
184       (*event_callback)(event_stream, "insn", (void*) p);
185 
186       /* reset certain state, so we can read it with confidence */
187       app_data->dinfo.insn_info_valid    = 0;
188       app_data->dinfo.branch_delay_insns = 0;
189       app_data->dinfo.data_size          = 0;
190       app_data->dinfo.insn_type          = 0;
191 
192       int size = (*app_data->dfn)((bfd_vma) p, &app_data->dinfo);
193 
194       if (size > 0)  p += size;
195       else           app_data->losing = true;
196 
197       if (!app_data->losing) {
198         const char* insn_close = format_insn_close("/insn", &app_data->dinfo,
199                                                    buf, sizeof(buf));
200         (*event_callback)(event_stream, insn_close, (void*) p);
201 
202         if (app_data->do_newline) {
203           /* follow each complete insn by a nice newline */
204           (*printf_callback)(printf_stream, "\n");
205         }
206       }
207     }
208 
209     if (app_data->losing) (*event_callback)(event_stream, "/insns", (void*) p);
210     return (void*) p;
211   }
212 }
213 
214 /* take the address of the function, for luck, and also test the typedef: */
215 const decode_func_vtype decode_func_virtual_address = &decode_instructions_virtual;
216 const decode_func_stype decode_func_address = &decode_instructions;
217 
218 static const char* format_insn_close(const char* close,
219                                      disassemble_info* dinfo,
220                                      char* buf, size_t bufsize) {
221   if (!dinfo->insn_info_valid)
222     return close;
223   enum dis_insn_type itype = dinfo->insn_type;
224   int dsize = dinfo->data_size, delays = dinfo->branch_delay_insns;
225   if ((itype == dis_nonbranch && (dsize | delays) == 0)
226       || (strlen(close) + 3*20 > bufsize))
227     return close;
228 
229   const char* type = "unknown";
230   switch (itype) {
231   case dis_nonbranch:   type = NULL;         break;
232   case dis_branch:      type = "branch";     break;
233   case dis_condbranch:  type = "condbranch"; break;
234   case dis_jsr:         type = "jsr";        break;
235   case dis_condjsr:     type = "condjsr";    break;
236   case dis_dref:        type = "dref";       break;
237   case dis_dref2:       type = "dref2";      break;
238   case dis_noninsn:     type = "noninsn";    break;
239   }
240 
241   strcpy(buf, close);
242   char* p = buf;
243   if (type)    sprintf(p += strlen(p), " type='%s'", type);
244   if (dsize)   sprintf(p += strlen(p), " dsize='%d'", dsize);
245   if (delays)  sprintf(p += strlen(p), " delay='%d'", delays);
246   return buf;
247 }
248 
249 /* handler functions */
250 
251 static int
252 hsdis_read_memory_func(bfd_vma memaddr,
253                        bfd_byte* myaddr,
254                        unsigned int length,
255                        struct disassemble_info* dinfo) {
256   DECL_APP_DATA(dinfo);
257   /* convert the virtual address memaddr into an address within memory buffer */
258   uintptr_t offset = ((uintptr_t) memaddr) - app_data->start_va;
259   if (offset + length > app_data->length) {
260     /* read is out of bounds */
261     return EIO;
262   } else {
263     memcpy(myaddr, (bfd_byte*) (app_data->buffer + offset), length);
264     return 0;
265   }
266 }
267 
268 static void
269 hsdis_print_address_func(bfd_vma vma, struct disassemble_info* dinfo) {
270   /* the actual value to print: */
271   void* addr_value = (void*) (uintptr_t) vma;
272   DECL_APP_DATA(dinfo);
273   DECL_EVENT_CALLBACK(app_data);
274 
275   /* issue the event: */
276   void* result =
277     (*event_callback)(event_stream, "addr/", addr_value);
278   if (result == NULL) {
279     /* event declined */
280     generic_print_address(vma, dinfo);
281   }
282 }
283 
284 
285 /* configuration */
286 
287 static void set_optional_callbacks(struct hsdis_app_data* app_data);
288 static void parse_caller_options(struct hsdis_app_data* app_data,
289                                  const char* caller_options);
290 static const char* native_arch_name();
291 static enum bfd_endian native_endian();
292 static const bfd_arch_info_type* find_arch_info(const char* arch_nane);
293 static bfd* get_native_bfd(const bfd_arch_info_type* arch_info,
294                            /* to avoid malloc: */
295                            bfd* empty_bfd, bfd_target* empty_xvec);
296 static void init_disassemble_info_from_bfd(struct disassemble_info* dinfo,
297                                            void *stream,
298                                            fprintf_ftype fprintf_func,
299                                            bfd* bfd,
300                                            char* disassembler_options);
301 static void parse_fake_insn(disassembler_ftype dfn,
302                             struct disassemble_info* dinfo);
303 
304 static void setup_app_data(struct hsdis_app_data* app_data,
305                            const char* caller_options) {
306   /* Make reasonable defaults for null callbacks.
307      A non-null stream for a null callback is assumed to be a FILE* for output.
308      Events are rendered as XML.
309   */
310   set_optional_callbacks(app_data);
311 
312   /* Look into caller_options for anything interesting. */
313   if (caller_options != NULL)
314     parse_caller_options(app_data, caller_options);
315 
316   /* Discover which architecture we are going to disassemble. */
317   app_data->arch_name = &app_data->mach_option[0];
318   if (app_data->arch_name[0] == '\0')
319     app_data->arch_name = native_arch_name();
320   app_data->arch_info = find_arch_info(app_data->arch_name);
321 
322   /* Make a fake bfd to hold the arch. and byteorder info. */
323   struct {
324     bfd_target empty_xvec;
325     bfd        empty_bfd;
326   } buf;
327   bfd* native_bfd = get_native_bfd(app_data->arch_info,
328                                    /* to avoid malloc: */
329                                    &buf.empty_bfd, &buf.empty_xvec);
330   init_disassemble_info_from_bfd(&app_data->dinfo,
331                                  app_data->printf_stream,
332                                  app_data->printf_callback,
333                                  native_bfd,
334                                  /* On PowerPC we get warnings, if we pass empty options */
335                                  (caller_options == NULL) ? NULL : app_data->insn_options);
336 
337   /* Finish linking together the various callback blocks. */
338   app_data->dinfo.application_data = (void*) app_data;
339   app_data->dfn = disassembler(bfd_get_arch(native_bfd),
340                                bfd_big_endian(native_bfd),
341                                bfd_get_mach(native_bfd),
342                                native_bfd);
343   app_data->dinfo.print_address_func = hsdis_print_address_func;
344   app_data->dinfo.read_memory_func = hsdis_read_memory_func;
345 
346   if (app_data->dfn == NULL) {
347     const char* bad = app_data->arch_name;
348     static bool complained;
349     if (bad == &app_data->mach_option[0])
350       print_help(app_data, "bad mach=%s", bad);
351     else if (!complained)
352       print_help(app_data, "bad native mach=%s; please port hsdis to this platform", bad);
353     complained = true;
354     /* must bail out */
355     app_data->losing = true;
356     return;
357   }
358 
359   parse_fake_insn(app_data->dfn, &app_data->dinfo);
360 }
361 
362 
363 /* ignore all events, return a null */
364 static void* null_event_callback(void* ignore_stream, const char* ignore_event, void* arg) {
365   return NULL;
366 }
367 
368 /* print all events as XML markup */
369 static void* xml_event_callback(void* stream, const char* event, void* arg) {
370   FILE* fp = (FILE*) stream;
371 #define NS_PFX "dis:"
372   if (event[0] != '/') {
373     /* issue the tag, with or without a formatted argument */
374     fprintf(fp, "<"NS_PFX);
375     fprintf(fp, event, arg);
376     fprintf(fp, ">");
377   } else {
378     ++event;                    /* skip slash */
379     const char* argp = strchr(event, ' ');
380     if (argp == NULL) {
381       /* no arguments; just issue the closing tag */
382       fprintf(fp, "</"NS_PFX"%s>", event);
383     } else {
384       /* split out the closing attributes as <dis:foo_done attr='val'/> */
385       int event_prefix = (argp - event);
386       fprintf(fp, "<"NS_PFX"%.*s_done", event_prefix, event);
387       fprintf(fp, argp, arg);
388       fprintf(fp, "/></"NS_PFX"%.*s>", event_prefix, event);
389     }
390   }
391   return NULL;
392 }
393 
394 static void set_optional_callbacks(struct hsdis_app_data* app_data) {
395   if (app_data->printf_callback == NULL) {
396     int (*fprintf_callback)(FILE*, const char*, ...) = &fprintf;
397     FILE* fprintf_stream = stdout;
398     app_data->printf_callback = (printf_callback_t) fprintf_callback;
399     if (app_data->printf_stream == NULL)
400       app_data->printf_stream   = (void*)           fprintf_stream;
401   }
402   if (app_data->event_callback == NULL) {
403     if (app_data->event_stream == NULL)
404       app_data->event_callback = &null_event_callback;
405     else
406       app_data->event_callback = &xml_event_callback;
407   }
408 
409 }
410 
411 static void parse_caller_options(struct hsdis_app_data* app_data, const char* caller_options) {
412   char* iop_base = app_data->insn_options;
413   char* iop_limit = iop_base + sizeof(app_data->insn_options) - 1;
414   char* iop = iop_base;
415   const char* p;
416   for (p = caller_options; p != NULL; ) {
417     const char* q = strchr(p, ',');
418     size_t plen = (q == NULL) ? strlen(p) : ((q++) - p);
419     if (plen == 4 && strncmp(p, "help", plen) == 0) {
420       print_help(app_data, NULL, NULL);
421     } else if (plen >= 5 && strncmp(p, "mach=", 5) == 0) {
422       char*  mach_option = app_data->mach_option;
423       size_t mach_size   = sizeof(app_data->mach_option);
424       mach_size -= 1;           /*leave room for the null*/
425       if (plen > mach_size)  plen = mach_size;
426       strncpy(mach_option, p, plen);
427       mach_option[plen] = '\0';
428     } else if (plen > 6 && strncmp(p, "hsdis-", 6) == 0) {
429       // do not pass these to the next level
430     } else {
431       /* just copy it; {i386,sparc}-dis.c might like to see it  */
432       if (iop > iop_base && iop < iop_limit)  (*iop++) = ',';
433       if (iop + plen > iop_limit)
434         plen = iop_limit - iop;
435       strncpy(iop, p, plen);
436       iop += plen;
437     }
438     p = q;
439   }
440   *iop = '\0';
441 }
442 
443 static void print_help(struct hsdis_app_data* app_data,
444                        const char* msg, const char* arg) {
445   DECL_PRINTF_CALLBACK(app_data);
446   if (msg != NULL) {
447     (*printf_callback)(printf_stream, "hsdis: ");
448     (*printf_callback)(printf_stream, msg, arg);
449     (*printf_callback)(printf_stream, "\n");
450   }
451   (*printf_callback)(printf_stream, "hsdis output options:\n");
452   if (printf_callback == (printf_callback_t) &fprintf)
453     disassembler_usage((FILE*) printf_stream);
454   else
455     disassembler_usage(stderr); /* better than nothing */
456   (*printf_callback)(printf_stream, "  mach=<arch>   select disassembly mode\n");
457 #if defined(LIBARCH_i386) || defined(LIBARCH_amd64)
458   (*printf_callback)(printf_stream, "  mach=i386     select 32-bit mode\n");
459   (*printf_callback)(printf_stream, "  mach=x86-64   select 64-bit mode\n");
460   (*printf_callback)(printf_stream, "  suffix        always print instruction suffix\n");
461 #endif
462   (*printf_callback)(printf_stream, "  help          print this message\n");
463 }
464 
465 
466 /* low-level bfd and arch stuff that binutils doesn't do for us */
467 
468 static const bfd_arch_info_type* find_arch_info(const char* arch_name) {
469   const bfd_arch_info_type* arch_info = bfd_scan_arch(arch_name);
470   if (arch_info == NULL) {
471     extern const bfd_arch_info_type bfd_default_arch_struct;
472     arch_info = &bfd_default_arch_struct;
473   }
474   return arch_info;
475 }
476 
477 static const char* native_arch_name() {
478   const char* res = NULL;
479 #ifdef LIBARCH_i386
480   res = "i386";
481 #endif
482 #ifdef LIBARCH_amd64
483   res = "i386:x86-64";
484 #endif
485 #if  defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le)
486   res = "powerpc:common64";
487 #endif
488 #ifdef LIBARCH_arm
489   res = "arm";
490 #endif
491 #ifdef LIBARCH_aarch64
492   res = "aarch64";
493 #endif
494 #ifdef LIBARCH_s390x
495   res = "s390:64-bit";
496 #endif
497 #ifdef LIBARCH_riscv64
498   res = "riscv:rv64";
499 #endif
500   if (res == NULL)
501     res = "architecture not set in Makefile!";
502   return res;
503 }
504 
505 static enum bfd_endian native_endian() {
506   int32_t endian_test = 'x';
507   if (*(const char*) &endian_test == 'x')
508     return BFD_ENDIAN_LITTLE;
509   else
510     return BFD_ENDIAN_BIG;
511 }
512 
513 static bfd* get_native_bfd(const bfd_arch_info_type* arch_info,
514                            bfd* empty_bfd, bfd_target* empty_xvec) {
515   memset(empty_bfd,  0, sizeof(*empty_bfd));
516   memset(empty_xvec, 0, sizeof(*empty_xvec));
517   empty_xvec->flavour = bfd_target_unknown_flavour;
518   empty_xvec->byteorder = native_endian();
519   empty_bfd->xvec = empty_xvec;
520   empty_bfd->arch_info = arch_info;
521   return empty_bfd;
522 }
523 
524 static int read_zero_data_only(bfd_vma ignore_p,
525                                bfd_byte* myaddr, unsigned int length,
526                                struct disassemble_info *ignore_info) {
527   memset(myaddr, 0, length);
528   return 0;
529 }
530 static int print_to_dev_null(void* ignore_stream, const char* ignore_format, ...) {
531   return 0;
532 }
533 
534 /* Prime the pump by running the selected disassembler on a null input.
535    This forces the machine-specific disassembler to divulge invariant
536    information like bytes_per_line.
537  */
538 static void parse_fake_insn(disassembler_ftype dfn,
539                             struct disassemble_info* dinfo) {
540   typedef int (*read_memory_ftype)
541     (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
542      struct disassemble_info *info);
543   read_memory_ftype read_memory_func = dinfo->read_memory_func;
544   fprintf_ftype     fprintf_func     = dinfo->fprintf_func;
545 
546   dinfo->read_memory_func = &read_zero_data_only;
547   dinfo->fprintf_func     = &print_to_dev_null;
548   (*dfn)(0, dinfo);
549 
550   /* put it back */
551   dinfo->read_memory_func = read_memory_func;
552   dinfo->fprintf_func     = fprintf_func;
553 }
554 
555 static void init_disassemble_info_from_bfd(struct disassemble_info* dinfo,
556                                            void *stream,
557                                            fprintf_ftype fprintf_func,
558                                            bfd* abfd,
559                                            char* disassembler_options) {
560   init_disassemble_info(dinfo, stream, fprintf_func);
561 
562   dinfo->flavour = bfd_get_flavour(abfd);
563   dinfo->arch = bfd_get_arch(abfd);
564   dinfo->mach = bfd_get_mach(abfd);
565   dinfo->disassembler_options = disassembler_options;
566 #if BFD_VERSION >= 234000000
567   /* bfd_octets_per_byte() has 2 args since binutils 2.34 */
568   dinfo->octets_per_byte = bfd_octets_per_byte (abfd, NULL);
569 #else
570   dinfo->octets_per_byte = bfd_octets_per_byte (abfd);
571 #endif
572   dinfo->skip_zeroes = sizeof(void*) * 2;
573   dinfo->skip_zeroes_at_end = sizeof(void*)-1;
574   dinfo->disassembler_needs_relocs = FALSE;
575 
576   if (bfd_big_endian(abfd))
577     dinfo->display_endian = dinfo->endian = BFD_ENDIAN_BIG;
578   else if (bfd_little_endian(abfd))
579     dinfo->display_endian = dinfo->endian = BFD_ENDIAN_LITTLE;
580   else
581     dinfo->endian = native_endian();
582 
583   disassemble_init_for_target(dinfo);
584 }