1 /*
   2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc_interface/collectedHeap.inline.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "memory/cardTableModRefBS.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc_implementation/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  58   _is_lval = false;
  59   _target = target;
  60   switch (rtype) {
  61   case relocInfo::oop_type:
  62   case relocInfo::metadata_type:
  63     // Oops are a special case. Normally they would be their own section
  64     // but in cases like icBuffer they are literals in the code stream that
  65     // we don't have a section for. We use none so that we get a literal address
  66     // which is always patchable.
  67     break;
  68   case relocInfo::external_word_type:
  69     _rspec = external_word_Relocation::spec(target);
  70     break;
  71   case relocInfo::internal_word_type:
  72     _rspec = internal_word_Relocation::spec(target);
  73     break;
  74   case relocInfo::opt_virtual_call_type:
  75     _rspec = opt_virtual_call_Relocation::spec();
  76     break;
  77   case relocInfo::static_call_type:
  78     _rspec = static_call_Relocation::spec();
  79     break;
  80   case relocInfo::runtime_call_type:
  81     _rspec = runtime_call_Relocation::spec();
  82     break;
  83   case relocInfo::poll_type:
  84   case relocInfo::poll_return_type:
  85     _rspec = Relocation::spec_simple(rtype);
  86     break;
  87   case relocInfo::none:
  88     break;
  89   default:
  90     ShouldNotReachHere();
  91     break;
  92   }
  93 }
  94 
  95 // Implementation of Address
  96 
  97 #ifdef _LP64
  98 
  99 Address Address::make_array(ArrayAddress adr) {
 100   // Not implementable on 64bit machines
 101   // Should have been handled higher up the call chain.
 102   ShouldNotReachHere();
 103   return Address();
 104 }
 105 
 106 // exceedingly dangerous constructor
 107 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 108   _base  = noreg;
 109   _index = noreg;
 110   _scale = no_scale;
 111   _disp  = disp;
 112   switch (rtype) {
 113     case relocInfo::external_word_type:
 114       _rspec = external_word_Relocation::spec(loc);
 115       break;
 116     case relocInfo::internal_word_type:
 117       _rspec = internal_word_Relocation::spec(loc);
 118       break;
 119     case relocInfo::runtime_call_type:
 120       // HMM
 121       _rspec = runtime_call_Relocation::spec();
 122       break;
 123     case relocInfo::poll_type:
 124     case relocInfo::poll_return_type:
 125       _rspec = Relocation::spec_simple(rtype);
 126       break;
 127     case relocInfo::none:
 128       break;
 129     default:
 130       ShouldNotReachHere();
 131   }
 132 }
 133 #else // LP64
 134 
 135 Address Address::make_array(ArrayAddress adr) {
 136   AddressLiteral base = adr.base();
 137   Address index = adr.index();
 138   assert(index._disp == 0, "must not have disp"); // maybe it can?
 139   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 140   array._rspec = base._rspec;
 141   return array;
 142 }
 143 
 144 // exceedingly dangerous constructor
 145 Address::Address(address loc, RelocationHolder spec) {
 146   _base  = noreg;
 147   _index = noreg;
 148   _scale = no_scale;
 149   _disp  = (intptr_t) loc;
 150   _rspec = spec;
 151 }
 152 
 153 #endif // _LP64
 154 
 155 
 156 
 157 // Convert the raw encoding form into the form expected by the constructor for
 158 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 159 // that to noreg for the Address constructor.
 160 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 161   RelocationHolder rspec;
 162   if (disp_reloc != relocInfo::none) {
 163     rspec = Relocation::spec_simple(disp_reloc);
 164   }
 165   bool valid_index = index != rsp->encoding();
 166   if (valid_index) {
 167     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 168     madr._rspec = rspec;
 169     return madr;
 170   } else {
 171     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 172     madr._rspec = rspec;
 173     return madr;
 174   }
 175 }
 176 
 177 // Implementation of Assembler
 178 
 179 int AbstractAssembler::code_fill_byte() {
 180   return (u_char)'\xF4'; // hlt
 181 }
 182 
 183 // make this go away someday
 184 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 185   if (rtype == relocInfo::none)
 186         emit_int32(data);
 187   else  emit_data(data, Relocation::spec_simple(rtype), format);
 188 }
 189 
 190 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 191   assert(imm_operand == 0, "default format must be immediate in this file");
 192   assert(inst_mark() != NULL, "must be inside InstructionMark");
 193   if (rspec.type() !=  relocInfo::none) {
 194     #ifdef ASSERT
 195       check_relocation(rspec, format);
 196     #endif
 197     // Do not use AbstractAssembler::relocate, which is not intended for
 198     // embedded words.  Instead, relocate to the enclosing instruction.
 199 
 200     // hack. call32 is too wide for mask so use disp32
 201     if (format == call32_operand)
 202       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 203     else
 204       code_section()->relocate(inst_mark(), rspec, format);
 205   }
 206   emit_int32(data);
 207 }
 208 
 209 static int encode(Register r) {
 210   int enc = r->encoding();
 211   if (enc >= 8) {
 212     enc -= 8;
 213   }
 214   return enc;
 215 }
 216 
 217 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 218   assert(dst->has_byte_register(), "must have byte register");
 219   assert(isByte(op1) && isByte(op2), "wrong opcode");
 220   assert(isByte(imm8), "not a byte");
 221   assert((op1 & 0x01) == 0, "should be 8bit operation");
 222   emit_int8(op1);
 223   emit_int8(op2 | encode(dst));
 224   emit_int8(imm8);
 225 }
 226 
 227 
 228 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 229   assert(isByte(op1) && isByte(op2), "wrong opcode");
 230   assert((op1 & 0x01) == 1, "should be 32bit operation");
 231   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 232   if (is8bit(imm32)) {
 233     emit_int8(op1 | 0x02); // set sign bit
 234     emit_int8(op2 | encode(dst));
 235     emit_int8(imm32 & 0xFF);
 236   } else {
 237     emit_int8(op1);
 238     emit_int8(op2 | encode(dst));
 239     emit_int32(imm32);
 240   }
 241 }
 242 
 243 // Force generation of a 4 byte immediate value even if it fits into 8bit
 244 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 245   assert(isByte(op1) && isByte(op2), "wrong opcode");
 246   assert((op1 & 0x01) == 1, "should be 32bit operation");
 247   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 248   emit_int8(op1);
 249   emit_int8(op2 | encode(dst));
 250   emit_int32(imm32);
 251 }
 252 
 253 // immediate-to-memory forms
 254 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 255   assert((op1 & 0x01) == 1, "should be 32bit operation");
 256   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 257   if (is8bit(imm32)) {
 258     emit_int8(op1 | 0x02); // set sign bit
 259     emit_operand(rm, adr, 1);
 260     emit_int8(imm32 & 0xFF);
 261   } else {
 262     emit_int8(op1);
 263     emit_operand(rm, adr, 4);
 264     emit_int32(imm32);
 265   }
 266 }
 267 
 268 
 269 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 270   assert(isByte(op1) && isByte(op2), "wrong opcode");
 271   emit_int8(op1);
 272   emit_int8(op2 | encode(dst) << 3 | encode(src));
 273 }
 274 
 275 
 276 void Assembler::emit_operand(Register reg, Register base, Register index,
 277                              Address::ScaleFactor scale, int disp,
 278                              RelocationHolder const& rspec,
 279                              int rip_relative_correction) {
 280   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 281 
 282   // Encode the registers as needed in the fields they are used in
 283 
 284   int regenc = encode(reg) << 3;
 285   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 286   int baseenc = base->is_valid() ? encode(base) : 0;
 287 
 288   if (base->is_valid()) {
 289     if (index->is_valid()) {
 290       assert(scale != Address::no_scale, "inconsistent address");
 291       // [base + index*scale + disp]
 292       if (disp == 0 && rtype == relocInfo::none  &&
 293           base != rbp LP64_ONLY(&& base != r13)) {
 294         // [base + index*scale]
 295         // [00 reg 100][ss index base]
 296         assert(index != rsp, "illegal addressing mode");
 297         emit_int8(0x04 | regenc);
 298         emit_int8(scale << 6 | indexenc | baseenc);
 299       } else if (is8bit(disp) && rtype == relocInfo::none) {
 300         // [base + index*scale + imm8]
 301         // [01 reg 100][ss index base] imm8
 302         assert(index != rsp, "illegal addressing mode");
 303         emit_int8(0x44 | regenc);
 304         emit_int8(scale << 6 | indexenc | baseenc);
 305         emit_int8(disp & 0xFF);
 306       } else {
 307         // [base + index*scale + disp32]
 308         // [10 reg 100][ss index base] disp32
 309         assert(index != rsp, "illegal addressing mode");
 310         emit_int8(0x84 | regenc);
 311         emit_int8(scale << 6 | indexenc | baseenc);
 312         emit_data(disp, rspec, disp32_operand);
 313       }
 314     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 315       // [rsp + disp]
 316       if (disp == 0 && rtype == relocInfo::none) {
 317         // [rsp]
 318         // [00 reg 100][00 100 100]
 319         emit_int8(0x04 | regenc);
 320         emit_int8(0x24);
 321       } else if (is8bit(disp) && rtype == relocInfo::none) {
 322         // [rsp + imm8]
 323         // [01 reg 100][00 100 100] disp8
 324         emit_int8(0x44 | regenc);
 325         emit_int8(0x24);
 326         emit_int8(disp & 0xFF);
 327       } else {
 328         // [rsp + imm32]
 329         // [10 reg 100][00 100 100] disp32
 330         emit_int8(0x84 | regenc);
 331         emit_int8(0x24);
 332         emit_data(disp, rspec, disp32_operand);
 333       }
 334     } else {
 335       // [base + disp]
 336       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 337       if (disp == 0 && rtype == relocInfo::none &&
 338           base != rbp LP64_ONLY(&& base != r13)) {
 339         // [base]
 340         // [00 reg base]
 341         emit_int8(0x00 | regenc | baseenc);
 342       } else if (is8bit(disp) && rtype == relocInfo::none) {
 343         // [base + disp8]
 344         // [01 reg base] disp8
 345         emit_int8(0x40 | regenc | baseenc);
 346         emit_int8(disp & 0xFF);
 347       } else {
 348         // [base + disp32]
 349         // [10 reg base] disp32
 350         emit_int8(0x80 | regenc | baseenc);
 351         emit_data(disp, rspec, disp32_operand);
 352       }
 353     }
 354   } else {
 355     if (index->is_valid()) {
 356       assert(scale != Address::no_scale, "inconsistent address");
 357       // [index*scale + disp]
 358       // [00 reg 100][ss index 101] disp32
 359       assert(index != rsp, "illegal addressing mode");
 360       emit_int8(0x04 | regenc);
 361       emit_int8(scale << 6 | indexenc | 0x05);
 362       emit_data(disp, rspec, disp32_operand);
 363     } else if (rtype != relocInfo::none ) {
 364       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 365       // [00 000 101] disp32
 366 
 367       emit_int8(0x05 | regenc);
 368       // Note that the RIP-rel. correction applies to the generated
 369       // disp field, but _not_ to the target address in the rspec.
 370 
 371       // disp was created by converting the target address minus the pc
 372       // at the start of the instruction. That needs more correction here.
 373       // intptr_t disp = target - next_ip;
 374       assert(inst_mark() != NULL, "must be inside InstructionMark");
 375       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 376       int64_t adjusted = disp;
 377       // Do rip-rel adjustment for 64bit
 378       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 379       assert(is_simm32(adjusted),
 380              "must be 32bit offset (RIP relative address)");
 381       emit_data((int32_t) adjusted, rspec, disp32_operand);
 382 
 383     } else {
 384       // 32bit never did this, did everything as the rip-rel/disp code above
 385       // [disp] ABSOLUTE
 386       // [00 reg 100][00 100 101] disp32
 387       emit_int8(0x04 | regenc);
 388       emit_int8(0x25);
 389       emit_data(disp, rspec, disp32_operand);
 390     }
 391   }
 392 }
 393 
 394 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 395                              Address::ScaleFactor scale, int disp,
 396                              RelocationHolder const& rspec) {
 397   emit_operand((Register)reg, base, index, scale, disp, rspec);
 398 }
 399 
 400 // Secret local extension to Assembler::WhichOperand:
 401 #define end_pc_operand (_WhichOperand_limit)
 402 
 403 address Assembler::locate_operand(address inst, WhichOperand which) {
 404   // Decode the given instruction, and return the address of
 405   // an embedded 32-bit operand word.
 406 
 407   // If "which" is disp32_operand, selects the displacement portion
 408   // of an effective address specifier.
 409   // If "which" is imm64_operand, selects the trailing immediate constant.
 410   // If "which" is call32_operand, selects the displacement of a call or jump.
 411   // Caller is responsible for ensuring that there is such an operand,
 412   // and that it is 32/64 bits wide.
 413 
 414   // If "which" is end_pc_operand, find the end of the instruction.
 415 
 416   address ip = inst;
 417   bool is_64bit = false;
 418 
 419   debug_only(bool has_disp32 = false);
 420   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 421 
 422   again_after_prefix:
 423   switch (0xFF & *ip++) {
 424 
 425   // These convenience macros generate groups of "case" labels for the switch.
 426 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 427 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 428              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 429 #define REP16(x) REP8((x)+0): \
 430               case REP8((x)+8)
 431 
 432   case CS_segment:
 433   case SS_segment:
 434   case DS_segment:
 435   case ES_segment:
 436   case FS_segment:
 437   case GS_segment:
 438     // Seems dubious
 439     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 440     assert(ip == inst+1, "only one prefix allowed");
 441     goto again_after_prefix;
 442 
 443   case 0x67:
 444   case REX:
 445   case REX_B:
 446   case REX_X:
 447   case REX_XB:
 448   case REX_R:
 449   case REX_RB:
 450   case REX_RX:
 451   case REX_RXB:
 452     NOT_LP64(assert(false, "64bit prefixes"));
 453     goto again_after_prefix;
 454 
 455   case REX_W:
 456   case REX_WB:
 457   case REX_WX:
 458   case REX_WXB:
 459   case REX_WR:
 460   case REX_WRB:
 461   case REX_WRX:
 462   case REX_WRXB:
 463     NOT_LP64(assert(false, "64bit prefixes"));
 464     is_64bit = true;
 465     goto again_after_prefix;
 466 
 467   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 468   case 0x88: // movb a, r
 469   case 0x89: // movl a, r
 470   case 0x8A: // movb r, a
 471   case 0x8B: // movl r, a
 472   case 0x8F: // popl a
 473     debug_only(has_disp32 = true);
 474     break;
 475 
 476   case 0x68: // pushq #32
 477     if (which == end_pc_operand) {
 478       return ip + 4;
 479     }
 480     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 481     return ip;                  // not produced by emit_operand
 482 
 483   case 0x66: // movw ... (size prefix)
 484     again_after_size_prefix2:
 485     switch (0xFF & *ip++) {
 486     case REX:
 487     case REX_B:
 488     case REX_X:
 489     case REX_XB:
 490     case REX_R:
 491     case REX_RB:
 492     case REX_RX:
 493     case REX_RXB:
 494     case REX_W:
 495     case REX_WB:
 496     case REX_WX:
 497     case REX_WXB:
 498     case REX_WR:
 499     case REX_WRB:
 500     case REX_WRX:
 501     case REX_WRXB:
 502       NOT_LP64(assert(false, "64bit prefix found"));
 503       goto again_after_size_prefix2;
 504     case 0x8B: // movw r, a
 505     case 0x89: // movw a, r
 506       debug_only(has_disp32 = true);
 507       break;
 508     case 0xC7: // movw a, #16
 509       debug_only(has_disp32 = true);
 510       tail_size = 2;  // the imm16
 511       break;
 512     case 0x0F: // several SSE/SSE2 variants
 513       ip--;    // reparse the 0x0F
 514       goto again_after_prefix;
 515     default:
 516       ShouldNotReachHere();
 517     }
 518     break;
 519 
 520   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 521     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 522     // these asserts are somewhat nonsensical
 523 #ifndef _LP64
 524     assert(which == imm_operand || which == disp32_operand,
 525            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 526 #else
 527     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 528            which == narrow_oop_operand && !is_64bit,
 529            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 530 #endif // _LP64
 531     return ip;
 532 
 533   case 0x69: // imul r, a, #32
 534   case 0xC7: // movl a, #32(oop?)
 535     tail_size = 4;
 536     debug_only(has_disp32 = true); // has both kinds of operands!
 537     break;
 538 
 539   case 0x0F: // movx..., etc.
 540     switch (0xFF & *ip++) {
 541     case 0x3A: // pcmpestri
 542       tail_size = 1;
 543     case 0x38: // ptest, pmovzxbw
 544       ip++; // skip opcode
 545       debug_only(has_disp32 = true); // has both kinds of operands!
 546       break;
 547 
 548     case 0x70: // pshufd r, r/a, #8
 549       debug_only(has_disp32 = true); // has both kinds of operands!
 550     case 0x73: // psrldq r, #8
 551       tail_size = 1;
 552       break;
 553 
 554     case 0x12: // movlps
 555     case 0x28: // movaps
 556     case 0x2E: // ucomiss
 557     case 0x2F: // comiss
 558     case 0x54: // andps
 559     case 0x55: // andnps
 560     case 0x56: // orps
 561     case 0x57: // xorps
 562     case 0x6E: // movd
 563     case 0x7E: // movd
 564     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 565       debug_only(has_disp32 = true);
 566       break;
 567 
 568     case 0xAD: // shrd r, a, %cl
 569     case 0xAF: // imul r, a
 570     case 0xBE: // movsbl r, a (movsxb)
 571     case 0xBF: // movswl r, a (movsxw)
 572     case 0xB6: // movzbl r, a (movzxb)
 573     case 0xB7: // movzwl r, a (movzxw)
 574     case REP16(0x40): // cmovl cc, r, a
 575     case 0xB0: // cmpxchgb
 576     case 0xB1: // cmpxchg
 577     case 0xC1: // xaddl
 578     case 0xC7: // cmpxchg8
 579     case REP16(0x90): // setcc a
 580       debug_only(has_disp32 = true);
 581       // fall out of the switch to decode the address
 582       break;
 583 
 584     case 0xC4: // pinsrw r, a, #8
 585       debug_only(has_disp32 = true);
 586     case 0xC5: // pextrw r, r, #8
 587       tail_size = 1;  // the imm8
 588       break;
 589 
 590     case 0xAC: // shrd r, a, #8
 591       debug_only(has_disp32 = true);
 592       tail_size = 1;  // the imm8
 593       break;
 594 
 595     case REP16(0x80): // jcc rdisp32
 596       if (which == end_pc_operand)  return ip + 4;
 597       assert(which == call32_operand, "jcc has no disp32 or imm");
 598       return ip;
 599     default:
 600       ShouldNotReachHere();
 601     }
 602     break;
 603 
 604   case 0x81: // addl a, #32; addl r, #32
 605     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 606     // on 32bit in the case of cmpl, the imm might be an oop
 607     tail_size = 4;
 608     debug_only(has_disp32 = true); // has both kinds of operands!
 609     break;
 610 
 611   case 0x83: // addl a, #8; addl r, #8
 612     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 613     debug_only(has_disp32 = true); // has both kinds of operands!
 614     tail_size = 1;
 615     break;
 616 
 617   case 0x9B:
 618     switch (0xFF & *ip++) {
 619     case 0xD9: // fnstcw a
 620       debug_only(has_disp32 = true);
 621       break;
 622     default:
 623       ShouldNotReachHere();
 624     }
 625     break;
 626 
 627   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 628   case REP4(0x10): // adc...
 629   case REP4(0x20): // and...
 630   case REP4(0x30): // xor...
 631   case REP4(0x08): // or...
 632   case REP4(0x18): // sbb...
 633   case REP4(0x28): // sub...
 634   case 0xF7: // mull a
 635   case 0x8D: // lea r, a
 636   case 0x87: // xchg r, a
 637   case REP4(0x38): // cmp...
 638   case 0x85: // test r, a
 639     debug_only(has_disp32 = true); // has both kinds of operands!
 640     break;
 641 
 642   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 643   case 0xC6: // movb a, #8
 644   case 0x80: // cmpb a, #8
 645   case 0x6B: // imul r, a, #8
 646     debug_only(has_disp32 = true); // has both kinds of operands!
 647     tail_size = 1; // the imm8
 648     break;
 649 
 650   case 0xC4: // VEX_3bytes
 651   case 0xC5: // VEX_2bytes
 652     assert((UseAVX > 0), "shouldn't have VEX prefix");
 653     assert(ip == inst+1, "no prefixes allowed");
 654     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 655     // but they have prefix 0x0F and processed when 0x0F processed above.
 656     //
 657     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 658     // instructions (these instructions are not supported in 64-bit mode).
 659     // To distinguish them bits [7:6] are set in the VEX second byte since
 660     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 661     // those VEX bits REX and vvvv bits are inverted.
 662     //
 663     // Fortunately C2 doesn't generate these instructions so we don't need
 664     // to check for them in product version.
 665 
 666     // Check second byte
 667     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 668 
 669     // First byte
 670     if ((0xFF & *inst) == VEX_3bytes) {
 671       ip++; // third byte
 672       is_64bit = ((VEX_W & *ip) == VEX_W);
 673     }
 674     ip++; // opcode
 675     // To find the end of instruction (which == end_pc_operand).
 676     switch (0xFF & *ip) {
 677     case 0x61: // pcmpestri r, r/a, #8
 678     case 0x70: // pshufd r, r/a, #8
 679     case 0x73: // psrldq r, #8
 680       tail_size = 1;  // the imm8
 681       break;
 682     default:
 683       break;
 684     }
 685     ip++; // skip opcode
 686     debug_only(has_disp32 = true); // has both kinds of operands!
 687     break;
 688 
 689   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 690   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 691   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 692   case 0xDD: // fld_d a; fst_d a; fstp_d a
 693   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 694   case 0xDF: // fild_d a; fistp_d a
 695   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 696   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 697   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 698     debug_only(has_disp32 = true);
 699     break;
 700 
 701   case 0xE8: // call rdisp32
 702   case 0xE9: // jmp  rdisp32
 703     if (which == end_pc_operand)  return ip + 4;
 704     assert(which == call32_operand, "call has no disp32 or imm");
 705     return ip;
 706 
 707   case 0xF0:                    // Lock
 708     assert(os::is_MP(), "only on MP");
 709     goto again_after_prefix;
 710 
 711   case 0xF3:                    // For SSE
 712   case 0xF2:                    // For SSE2
 713     switch (0xFF & *ip++) {
 714     case REX:
 715     case REX_B:
 716     case REX_X:
 717     case REX_XB:
 718     case REX_R:
 719     case REX_RB:
 720     case REX_RX:
 721     case REX_RXB:
 722     case REX_W:
 723     case REX_WB:
 724     case REX_WX:
 725     case REX_WXB:
 726     case REX_WR:
 727     case REX_WRB:
 728     case REX_WRX:
 729     case REX_WRXB:
 730       NOT_LP64(assert(false, "found 64bit prefix"));
 731       ip++;
 732     default:
 733       ip++;
 734     }
 735     debug_only(has_disp32 = true); // has both kinds of operands!
 736     break;
 737 
 738   default:
 739     ShouldNotReachHere();
 740 
 741 #undef REP8
 742 #undef REP16
 743   }
 744 
 745   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 746 #ifdef _LP64
 747   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 748 #else
 749   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 750   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 751 #endif // LP64
 752   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 753 
 754   // parse the output of emit_operand
 755   int op2 = 0xFF & *ip++;
 756   int base = op2 & 0x07;
 757   int op3 = -1;
 758   const int b100 = 4;
 759   const int b101 = 5;
 760   if (base == b100 && (op2 >> 6) != 3) {
 761     op3 = 0xFF & *ip++;
 762     base = op3 & 0x07;   // refetch the base
 763   }
 764   // now ip points at the disp (if any)
 765 
 766   switch (op2 >> 6) {
 767   case 0:
 768     // [00 reg  100][ss index base]
 769     // [00 reg  100][00   100  esp]
 770     // [00 reg base]
 771     // [00 reg  100][ss index  101][disp32]
 772     // [00 reg  101]               [disp32]
 773 
 774     if (base == b101) {
 775       if (which == disp32_operand)
 776         return ip;              // caller wants the disp32
 777       ip += 4;                  // skip the disp32
 778     }
 779     break;
 780 
 781   case 1:
 782     // [01 reg  100][ss index base][disp8]
 783     // [01 reg  100][00   100  esp][disp8]
 784     // [01 reg base]               [disp8]
 785     ip += 1;                    // skip the disp8
 786     break;
 787 
 788   case 2:
 789     // [10 reg  100][ss index base][disp32]
 790     // [10 reg  100][00   100  esp][disp32]
 791     // [10 reg base]               [disp32]
 792     if (which == disp32_operand)
 793       return ip;                // caller wants the disp32
 794     ip += 4;                    // skip the disp32
 795     break;
 796 
 797   case 3:
 798     // [11 reg base]  (not a memory addressing mode)
 799     break;
 800   }
 801 
 802   if (which == end_pc_operand) {
 803     return ip + tail_size;
 804   }
 805 
 806 #ifdef _LP64
 807   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
 808 #else
 809   assert(which == imm_operand, "instruction has only an imm field");
 810 #endif // LP64
 811   return ip;
 812 }
 813 
 814 address Assembler::locate_next_instruction(address inst) {
 815   // Secretly share code with locate_operand:
 816   return locate_operand(inst, end_pc_operand);
 817 }
 818 
 819 
 820 #ifdef ASSERT
 821 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 822   address inst = inst_mark();
 823   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
 824   address opnd;
 825 
 826   Relocation* r = rspec.reloc();
 827   if (r->type() == relocInfo::none) {
 828     return;
 829   } else if (r->is_call() || format == call32_operand) {
 830     // assert(format == imm32_operand, "cannot specify a nonzero format");
 831     opnd = locate_operand(inst, call32_operand);
 832   } else if (r->is_data()) {
 833     assert(format == imm_operand || format == disp32_operand
 834            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
 835     opnd = locate_operand(inst, (WhichOperand)format);
 836   } else {
 837     assert(format == imm_operand, "cannot specify a format");
 838     return;
 839   }
 840   assert(opnd == pc(), "must put operand where relocs can find it");
 841 }
 842 #endif // ASSERT
 843 
 844 void Assembler::emit_operand32(Register reg, Address adr) {
 845   assert(reg->encoding() < 8, "no extended registers");
 846   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 847   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 848                adr._rspec);
 849 }
 850 
 851 void Assembler::emit_operand(Register reg, Address adr,
 852                              int rip_relative_correction) {
 853   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 854                adr._rspec,
 855                rip_relative_correction);
 856 }
 857 
 858 void Assembler::emit_operand(XMMRegister reg, Address adr) {
 859   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 860                adr._rspec);
 861 }
 862 
 863 // MMX operations
 864 void Assembler::emit_operand(MMXRegister reg, Address adr) {
 865   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 866   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 867 }
 868 
 869 // work around gcc (3.2.1-7a) bug
 870 void Assembler::emit_operand(Address adr, MMXRegister reg) {
 871   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 872   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 873 }
 874 
 875 
 876 void Assembler::emit_farith(int b1, int b2, int i) {
 877   assert(isByte(b1) && isByte(b2), "wrong opcode");
 878   assert(0 <= i &&  i < 8, "illegal stack offset");
 879   emit_int8(b1);
 880   emit_int8(b2 + i);
 881 }
 882 
 883 
 884 // Now the Assembler instructions (identical for 32/64 bits)
 885 
 886 void Assembler::adcl(Address dst, int32_t imm32) {
 887   InstructionMark im(this);
 888   prefix(dst);
 889   emit_arith_operand(0x81, rdx, dst, imm32);
 890 }
 891 
 892 void Assembler::adcl(Address dst, Register src) {
 893   InstructionMark im(this);
 894   prefix(dst, src);
 895   emit_int8(0x11);
 896   emit_operand(src, dst);
 897 }
 898 
 899 void Assembler::adcl(Register dst, int32_t imm32) {
 900   prefix(dst);
 901   emit_arith(0x81, 0xD0, dst, imm32);
 902 }
 903 
 904 void Assembler::adcl(Register dst, Address src) {
 905   InstructionMark im(this);
 906   prefix(src, dst);
 907   emit_int8(0x13);
 908   emit_operand(dst, src);
 909 }
 910 
 911 void Assembler::adcl(Register dst, Register src) {
 912   (void) prefix_and_encode(dst->encoding(), src->encoding());
 913   emit_arith(0x13, 0xC0, dst, src);
 914 }
 915 
 916 void Assembler::addl(Address dst, int32_t imm32) {
 917   InstructionMark im(this);
 918   prefix(dst);
 919   emit_arith_operand(0x81, rax, dst, imm32);
 920 }
 921 
 922 void Assembler::addl(Address dst, Register src) {
 923   InstructionMark im(this);
 924   prefix(dst, src);
 925   emit_int8(0x01);
 926   emit_operand(src, dst);
 927 }
 928 
 929 void Assembler::addl(Register dst, int32_t imm32) {
 930   prefix(dst);
 931   emit_arith(0x81, 0xC0, dst, imm32);
 932 }
 933 
 934 void Assembler::addl(Register dst, Address src) {
 935   InstructionMark im(this);
 936   prefix(src, dst);
 937   emit_int8(0x03);
 938   emit_operand(dst, src);
 939 }
 940 
 941 void Assembler::addl(Register dst, Register src) {
 942   (void) prefix_and_encode(dst->encoding(), src->encoding());
 943   emit_arith(0x03, 0xC0, dst, src);
 944 }
 945 
 946 void Assembler::addr_nop_4() {
 947   assert(UseAddressNop, "no CPU support");
 948   // 4 bytes: NOP DWORD PTR [EAX+0]
 949   emit_int8(0x0F);
 950   emit_int8(0x1F);
 951   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
 952   emit_int8(0);    // 8-bits offset (1 byte)
 953 }
 954 
 955 void Assembler::addr_nop_5() {
 956   assert(UseAddressNop, "no CPU support");
 957   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
 958   emit_int8(0x0F);
 959   emit_int8(0x1F);
 960   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
 961   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 962   emit_int8(0);    // 8-bits offset (1 byte)
 963 }
 964 
 965 void Assembler::addr_nop_7() {
 966   assert(UseAddressNop, "no CPU support");
 967   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
 968   emit_int8(0x0F);
 969   emit_int8(0x1F);
 970   emit_int8((unsigned char)0x80);
 971                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 972   emit_int32(0);   // 32-bits offset (4 bytes)
 973 }
 974 
 975 void Assembler::addr_nop_8() {
 976   assert(UseAddressNop, "no CPU support");
 977   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 978   emit_int8(0x0F);
 979   emit_int8(0x1F);
 980   emit_int8((unsigned char)0x84);
 981                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 982   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 983   emit_int32(0);   // 32-bits offset (4 bytes)
 984 }
 985 
 986 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 988   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 989 }
 990 
 991 void Assembler::addsd(XMMRegister dst, Address src) {
 992   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 993   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 994 }
 995 
 996 void Assembler::addss(XMMRegister dst, XMMRegister src) {
 997   NOT_LP64(assert(VM_Version::supports_sse(), ""));
 998   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 999 }
1000 
1001 void Assembler::addss(XMMRegister dst, Address src) {
1002   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1003   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1004 }
1005 
1006 void Assembler::aesdec(XMMRegister dst, Address src) {
1007   assert(VM_Version::supports_aes(), "");
1008   InstructionMark im(this);
1009   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1010   emit_int8((unsigned char)0xDE);
1011   emit_operand(dst, src);
1012 }
1013 
1014 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1015   assert(VM_Version::supports_aes(), "");
1016   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017   emit_int8((unsigned char)0xDE);
1018   emit_int8(0xC0 | encode);
1019 }
1020 
1021 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1022   assert(VM_Version::supports_aes(), "");
1023   InstructionMark im(this);
1024   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1025   emit_int8((unsigned char)0xDF);
1026   emit_operand(dst, src);
1027 }
1028 
1029 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1030   assert(VM_Version::supports_aes(), "");
1031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032   emit_int8((unsigned char)0xDF);
1033   emit_int8((unsigned char)(0xC0 | encode));
1034 }
1035 
1036 void Assembler::aesenc(XMMRegister dst, Address src) {
1037   assert(VM_Version::supports_aes(), "");
1038   InstructionMark im(this);
1039   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1040   emit_int8((unsigned char)0xDC);
1041   emit_operand(dst, src);
1042 }
1043 
1044 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1045   assert(VM_Version::supports_aes(), "");
1046   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047   emit_int8((unsigned char)0xDC);
1048   emit_int8(0xC0 | encode);
1049 }
1050 
1051 void Assembler::aesenclast(XMMRegister dst, Address src) {
1052   assert(VM_Version::supports_aes(), "");
1053   InstructionMark im(this);
1054   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1055   emit_int8((unsigned char)0xDD);
1056   emit_operand(dst, src);
1057 }
1058 
1059 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1060   assert(VM_Version::supports_aes(), "");
1061   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062   emit_int8((unsigned char)0xDD);
1063   emit_int8((unsigned char)(0xC0 | encode));
1064 }
1065 
1066 
1067 void Assembler::andl(Address dst, int32_t imm32) {
1068   InstructionMark im(this);
1069   prefix(dst);
1070   emit_int8((unsigned char)0x81);
1071   emit_operand(rsp, dst, 4);
1072   emit_int32(imm32);
1073 }
1074 
1075 void Assembler::andl(Register dst, int32_t imm32) {
1076   prefix(dst);
1077   emit_arith(0x81, 0xE0, dst, imm32);
1078 }
1079 
1080 void Assembler::andl(Register dst, Address src) {
1081   InstructionMark im(this);
1082   prefix(src, dst);
1083   emit_int8(0x23);
1084   emit_operand(dst, src);
1085 }
1086 
1087 void Assembler::andl(Register dst, Register src) {
1088   (void) prefix_and_encode(dst->encoding(), src->encoding());
1089   emit_arith(0x23, 0xC0, dst, src);
1090 }
1091 
1092 void Assembler::andnl(Register dst, Register src1, Register src2) {
1093   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1094   int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
1095   emit_int8((unsigned char)0xF2);
1096   emit_int8((unsigned char)(0xC0 | encode));
1097 }
1098 
1099 void Assembler::andnl(Register dst, Register src1, Address src2) {
1100   InstructionMark im(this);
1101   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1102   vex_prefix_0F38(dst, src1, src2);
1103   emit_int8((unsigned char)0xF2);
1104   emit_operand(dst, src2);
1105 }
1106 
1107 void Assembler::bsfl(Register dst, Register src) {
1108   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109   emit_int8(0x0F);
1110   emit_int8((unsigned char)0xBC);
1111   emit_int8((unsigned char)(0xC0 | encode));
1112 }
1113 
1114 void Assembler::bsrl(Register dst, Register src) {
1115   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1116   emit_int8(0x0F);
1117   emit_int8((unsigned char)0xBD);
1118   emit_int8((unsigned char)(0xC0 | encode));
1119 }
1120 
1121 void Assembler::bswapl(Register reg) { // bswap
1122   int encode = prefix_and_encode(reg->encoding());
1123   emit_int8(0x0F);
1124   emit_int8((unsigned char)(0xC8 | encode));
1125 }
1126 
1127 void Assembler::blsil(Register dst, Register src) {
1128   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1129   int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
1130   emit_int8((unsigned char)0xF3);
1131   emit_int8((unsigned char)(0xC0 | encode));
1132 }
1133 
1134 void Assembler::blsil(Register dst, Address src) {
1135   InstructionMark im(this);
1136   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1137   vex_prefix_0F38(rbx, dst, src);
1138   emit_int8((unsigned char)0xF3);
1139   emit_operand(rbx, src);
1140 }
1141 
1142 void Assembler::blsmskl(Register dst, Register src) {
1143   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1144   int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
1145   emit_int8((unsigned char)0xF3);
1146   emit_int8((unsigned char)(0xC0 | encode));
1147 }
1148 
1149 void Assembler::blsmskl(Register dst, Address src) {
1150   InstructionMark im(this);
1151   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1152   vex_prefix_0F38(rdx, dst, src);
1153   emit_int8((unsigned char)0xF3);
1154   emit_operand(rdx, src);
1155 }
1156 
1157 void Assembler::blsrl(Register dst, Register src) {
1158   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1159   int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
1160   emit_int8((unsigned char)0xF3);
1161   emit_int8((unsigned char)(0xC0 | encode));
1162 }
1163 
1164 void Assembler::blsrl(Register dst, Address src) {
1165   InstructionMark im(this);
1166   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1167   vex_prefix_0F38(rcx, dst, src);
1168   emit_int8((unsigned char)0xF3);
1169   emit_operand(rcx, src);
1170 }
1171 
1172 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1173   // suspect disp32 is always good
1174   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1175 
1176   if (L.is_bound()) {
1177     const int long_size = 5;
1178     int offs = (int)( target(L) - pc() );
1179     assert(offs <= 0, "assembler error");
1180     InstructionMark im(this);
1181     // 1110 1000 #32-bit disp
1182     emit_int8((unsigned char)0xE8);
1183     emit_data(offs - long_size, rtype, operand);
1184   } else {
1185     InstructionMark im(this);
1186     // 1110 1000 #32-bit disp
1187     L.add_patch_at(code(), locator());
1188 
1189     emit_int8((unsigned char)0xE8);
1190     emit_data(int(0), rtype, operand);
1191   }
1192 }
1193 
1194 void Assembler::call(Register dst) {
1195   int encode = prefix_and_encode(dst->encoding());
1196   emit_int8((unsigned char)0xFF);
1197   emit_int8((unsigned char)(0xD0 | encode));
1198 }
1199 
1200 
1201 void Assembler::call(Address adr) {
1202   InstructionMark im(this);
1203   prefix(adr);
1204   emit_int8((unsigned char)0xFF);
1205   emit_operand(rdx, adr);
1206 }
1207 
1208 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1209   assert(entry != NULL, "call most probably wrong");
1210   InstructionMark im(this);
1211   emit_int8((unsigned char)0xE8);
1212   intptr_t disp = entry - (pc() + sizeof(int32_t));
1213   assert(is_simm32(disp), "must be 32bit offset (call2)");
1214   // Technically, should use call32_operand, but this format is
1215   // implied by the fact that we're emitting a call instruction.
1216 
1217   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1218   emit_data((int) disp, rspec, operand);
1219 }
1220 
1221 void Assembler::cdql() {
1222   emit_int8((unsigned char)0x99);
1223 }
1224 
1225 void Assembler::cld() {
1226   emit_int8((unsigned char)0xFC);
1227 }
1228 
1229 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1230   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1231   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1232   emit_int8(0x0F);
1233   emit_int8(0x40 | cc);
1234   emit_int8((unsigned char)(0xC0 | encode));
1235 }
1236 
1237 
1238 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1239   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1240   prefix(src, dst);
1241   emit_int8(0x0F);
1242   emit_int8(0x40 | cc);
1243   emit_operand(dst, src);
1244 }
1245 
1246 void Assembler::cmpb(Address dst, int imm8) {
1247   InstructionMark im(this);
1248   prefix(dst);
1249   emit_int8((unsigned char)0x80);
1250   emit_operand(rdi, dst, 1);
1251   emit_int8(imm8);
1252 }
1253 
1254 void Assembler::cmpl(Address dst, int32_t imm32) {
1255   InstructionMark im(this);
1256   prefix(dst);
1257   emit_int8((unsigned char)0x81);
1258   emit_operand(rdi, dst, 4);
1259   emit_int32(imm32);
1260 }
1261 
1262 void Assembler::cmpl(Register dst, int32_t imm32) {
1263   prefix(dst);
1264   emit_arith(0x81, 0xF8, dst, imm32);
1265 }
1266 
1267 void Assembler::cmpl(Register dst, Register src) {
1268   (void) prefix_and_encode(dst->encoding(), src->encoding());
1269   emit_arith(0x3B, 0xC0, dst, src);
1270 }
1271 
1272 
1273 void Assembler::cmpl(Register dst, Address  src) {
1274   InstructionMark im(this);
1275   prefix(src, dst);
1276   emit_int8((unsigned char)0x3B);
1277   emit_operand(dst, src);
1278 }
1279 
1280 void Assembler::cmpw(Address dst, int imm16) {
1281   InstructionMark im(this);
1282   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1283   emit_int8(0x66);
1284   emit_int8((unsigned char)0x81);
1285   emit_operand(rdi, dst, 2);
1286   emit_int16(imm16);
1287 }
1288 
1289 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1290 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1291 // The ZF is set if the compared values were equal, and cleared otherwise.
1292 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1293   InstructionMark im(this);
1294   prefix(adr, reg);
1295   emit_int8(0x0F);
1296   emit_int8((unsigned char)0xB1);
1297   emit_operand(reg, adr);
1298 }
1299 
1300 void Assembler::comisd(XMMRegister dst, Address src) {
1301   // NOTE: dbx seems to decode this as comiss even though the
1302   // 0x66 is there. Strangly ucomisd comes out correct
1303   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1304   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1305 }
1306 
1307 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1308   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1309   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1310 }
1311 
1312 void Assembler::comiss(XMMRegister dst, Address src) {
1313   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1314   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1315 }
1316 
1317 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1318   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1319   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1320 }
1321 
1322 void Assembler::cpuid() {
1323   emit_int8(0x0F);
1324   emit_int8((unsigned char)0xA2);
1325 }
1326 
1327 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1328   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1329   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1330 }
1331 
1332 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1333   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1334   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1335 }
1336 
1337 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1338   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1339   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1340 }
1341 
1342 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1343   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1344   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1345 }
1346 
1347 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1348   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1349   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1350   emit_int8(0x2A);
1351   emit_int8((unsigned char)(0xC0 | encode));
1352 }
1353 
1354 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1355   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1356   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1357 }
1358 
1359 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1360   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1361   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1362   emit_int8(0x2A);
1363   emit_int8((unsigned char)(0xC0 | encode));
1364 }
1365 
1366 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1367   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1368   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1369 }
1370 
1371 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1372   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1373   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1374 }
1375 
1376 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1377   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1378   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1379 }
1380 
1381 
1382 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1383   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1384   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1385   emit_int8(0x2C);
1386   emit_int8((unsigned char)(0xC0 | encode));
1387 }
1388 
1389 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1390   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1391   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1392   emit_int8(0x2C);
1393   emit_int8((unsigned char)(0xC0 | encode));
1394 }
1395 
1396 void Assembler::decl(Address dst) {
1397   // Don't use it directly. Use MacroAssembler::decrement() instead.
1398   InstructionMark im(this);
1399   prefix(dst);
1400   emit_int8((unsigned char)0xFF);
1401   emit_operand(rcx, dst);
1402 }
1403 
1404 void Assembler::divsd(XMMRegister dst, Address src) {
1405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1406   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1407 }
1408 
1409 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1411   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1412 }
1413 
1414 void Assembler::divss(XMMRegister dst, Address src) {
1415   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1416   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1417 }
1418 
1419 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1420   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1421   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1422 }
1423 
1424 void Assembler::emms() {
1425   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1426   emit_int8(0x0F);
1427   emit_int8(0x77);
1428 }
1429 
1430 void Assembler::hlt() {
1431   emit_int8((unsigned char)0xF4);
1432 }
1433 
1434 void Assembler::idivl(Register src) {
1435   int encode = prefix_and_encode(src->encoding());
1436   emit_int8((unsigned char)0xF7);
1437   emit_int8((unsigned char)(0xF8 | encode));
1438 }
1439 
1440 void Assembler::divl(Register src) { // Unsigned
1441   int encode = prefix_and_encode(src->encoding());
1442   emit_int8((unsigned char)0xF7);
1443   emit_int8((unsigned char)(0xF0 | encode));
1444 }
1445 
1446 void Assembler::imull(Register dst, Register src) {
1447   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1448   emit_int8(0x0F);
1449   emit_int8((unsigned char)0xAF);
1450   emit_int8((unsigned char)(0xC0 | encode));
1451 }
1452 
1453 
1454 void Assembler::imull(Register dst, Register src, int value) {
1455   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1456   if (is8bit(value)) {
1457     emit_int8(0x6B);
1458     emit_int8((unsigned char)(0xC0 | encode));
1459     emit_int8(value & 0xFF);
1460   } else {
1461     emit_int8(0x69);
1462     emit_int8((unsigned char)(0xC0 | encode));
1463     emit_int32(value);
1464   }
1465 }
1466 
1467 void Assembler::imull(Register dst, Address src) {
1468   InstructionMark im(this);
1469   prefix(src, dst);
1470   emit_int8(0x0F);
1471   emit_int8((unsigned char) 0xAF);
1472   emit_operand(dst, src);
1473 }
1474 
1475 
1476 void Assembler::incl(Address dst) {
1477   // Don't use it directly. Use MacroAssembler::increment() instead.
1478   InstructionMark im(this);
1479   prefix(dst);
1480   emit_int8((unsigned char)0xFF);
1481   emit_operand(rax, dst);
1482 }
1483 
1484 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1485   InstructionMark im(this);
1486   assert((0 <= cc) && (cc < 16), "illegal cc");
1487   if (L.is_bound()) {
1488     address dst = target(L);
1489     assert(dst != NULL, "jcc most probably wrong");
1490 
1491     const int short_size = 2;
1492     const int long_size = 6;
1493     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1494     if (maybe_short && is8bit(offs - short_size)) {
1495       // 0111 tttn #8-bit disp
1496       emit_int8(0x70 | cc);
1497       emit_int8((offs - short_size) & 0xFF);
1498     } else {
1499       // 0000 1111 1000 tttn #32-bit disp
1500       assert(is_simm32(offs - long_size),
1501              "must be 32bit offset (call4)");
1502       emit_int8(0x0F);
1503       emit_int8((unsigned char)(0x80 | cc));
1504       emit_int32(offs - long_size);
1505     }
1506   } else {
1507     // Note: could eliminate cond. jumps to this jump if condition
1508     //       is the same however, seems to be rather unlikely case.
1509     // Note: use jccb() if label to be bound is very close to get
1510     //       an 8-bit displacement
1511     L.add_patch_at(code(), locator());
1512     emit_int8(0x0F);
1513     emit_int8((unsigned char)(0x80 | cc));
1514     emit_int32(0);
1515   }
1516 }
1517 
1518 void Assembler::jccb(Condition cc, Label& L) {
1519   if (L.is_bound()) {
1520     const int short_size = 2;
1521     address entry = target(L);
1522 #ifdef ASSERT
1523     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1524     intptr_t delta = short_branch_delta();
1525     if (delta != 0) {
1526       dist += (dist < 0 ? (-delta) :delta);
1527     }
1528     assert(is8bit(dist), "Dispacement too large for a short jmp");
1529 #endif
1530     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1531     // 0111 tttn #8-bit disp
1532     emit_int8(0x70 | cc);
1533     emit_int8((offs - short_size) & 0xFF);
1534   } else {
1535     InstructionMark im(this);
1536     L.add_patch_at(code(), locator());
1537     emit_int8(0x70 | cc);
1538     emit_int8(0);
1539   }
1540 }
1541 
1542 void Assembler::jmp(Address adr) {
1543   InstructionMark im(this);
1544   prefix(adr);
1545   emit_int8((unsigned char)0xFF);
1546   emit_operand(rsp, adr);
1547 }
1548 
1549 void Assembler::jmp(Label& L, bool maybe_short) {
1550   if (L.is_bound()) {
1551     address entry = target(L);
1552     assert(entry != NULL, "jmp most probably wrong");
1553     InstructionMark im(this);
1554     const int short_size = 2;
1555     const int long_size = 5;
1556     intptr_t offs = entry - pc();
1557     if (maybe_short && is8bit(offs - short_size)) {
1558       emit_int8((unsigned char)0xEB);
1559       emit_int8((offs - short_size) & 0xFF);
1560     } else {
1561       emit_int8((unsigned char)0xE9);
1562       emit_int32(offs - long_size);
1563     }
1564   } else {
1565     // By default, forward jumps are always 32-bit displacements, since
1566     // we can't yet know where the label will be bound.  If you're sure that
1567     // the forward jump will not run beyond 256 bytes, use jmpb to
1568     // force an 8-bit displacement.
1569     InstructionMark im(this);
1570     L.add_patch_at(code(), locator());
1571     emit_int8((unsigned char)0xE9);
1572     emit_int32(0);
1573   }
1574 }
1575 
1576 void Assembler::jmp(Register entry) {
1577   int encode = prefix_and_encode(entry->encoding());
1578   emit_int8((unsigned char)0xFF);
1579   emit_int8((unsigned char)(0xE0 | encode));
1580 }
1581 
1582 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1583   InstructionMark im(this);
1584   emit_int8((unsigned char)0xE9);
1585   assert(dest != NULL, "must have a target");
1586   intptr_t disp = dest - (pc() + sizeof(int32_t));
1587   assert(is_simm32(disp), "must be 32bit offset (jmp)");
1588   emit_data(disp, rspec.reloc(), call32_operand);
1589 }
1590 
1591 void Assembler::jmpb(Label& L) {
1592   if (L.is_bound()) {
1593     const int short_size = 2;
1594     address entry = target(L);
1595     assert(entry != NULL, "jmp most probably wrong");
1596 #ifdef ASSERT
1597     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1598     intptr_t delta = short_branch_delta();
1599     if (delta != 0) {
1600       dist += (dist < 0 ? (-delta) :delta);
1601     }
1602     assert(is8bit(dist), "Dispacement too large for a short jmp");
1603 #endif
1604     intptr_t offs = entry - pc();
1605     emit_int8((unsigned char)0xEB);
1606     emit_int8((offs - short_size) & 0xFF);
1607   } else {
1608     InstructionMark im(this);
1609     L.add_patch_at(code(), locator());
1610     emit_int8((unsigned char)0xEB);
1611     emit_int8(0);
1612   }
1613 }
1614 
1615 void Assembler::ldmxcsr( Address src) {
1616   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1617   InstructionMark im(this);
1618   prefix(src);
1619   emit_int8(0x0F);
1620   emit_int8((unsigned char)0xAE);
1621   emit_operand(as_Register(2), src);
1622 }
1623 
1624 void Assembler::leal(Register dst, Address src) {
1625   InstructionMark im(this);
1626 #ifdef _LP64
1627   emit_int8(0x67); // addr32
1628   prefix(src, dst);
1629 #endif // LP64
1630   emit_int8((unsigned char)0x8D);
1631   emit_operand(dst, src);
1632 }
1633 
1634 void Assembler::lfence() {
1635   emit_int8(0x0F);
1636   emit_int8((unsigned char)0xAE);
1637   emit_int8((unsigned char)0xE8);
1638 }
1639 
1640 void Assembler::lock() {
1641   emit_int8((unsigned char)0xF0);
1642 }
1643 
1644 void Assembler::lzcntl(Register dst, Register src) {
1645   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1646   emit_int8((unsigned char)0xF3);
1647   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1648   emit_int8(0x0F);
1649   emit_int8((unsigned char)0xBD);
1650   emit_int8((unsigned char)(0xC0 | encode));
1651 }
1652 
1653 // Emit mfence instruction
1654 void Assembler::mfence() {
1655   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1656   emit_int8(0x0F);
1657   emit_int8((unsigned char)0xAE);
1658   emit_int8((unsigned char)0xF0);
1659 }
1660 
1661 void Assembler::mov(Register dst, Register src) {
1662   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1663 }
1664 
1665 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1667   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1668 }
1669 
1670 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1671   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1672   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1673 }
1674 
1675 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1676   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1677   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1678   emit_int8(0x16);
1679   emit_int8((unsigned char)(0xC0 | encode));
1680 }
1681 
1682 void Assembler::movb(Register dst, Address src) {
1683   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1684   InstructionMark im(this);
1685   prefix(src, dst, true);
1686   emit_int8((unsigned char)0x8A);
1687   emit_operand(dst, src);
1688 }
1689 
1690 
1691 void Assembler::movb(Address dst, int imm8) {
1692   InstructionMark im(this);
1693    prefix(dst);
1694   emit_int8((unsigned char)0xC6);
1695   emit_operand(rax, dst, 1);
1696   emit_int8(imm8);
1697 }
1698 
1699 
1700 void Assembler::movb(Address dst, Register src) {
1701   assert(src->has_byte_register(), "must have byte register");
1702   InstructionMark im(this);
1703   prefix(dst, src, true);
1704   emit_int8((unsigned char)0x88);
1705   emit_operand(src, dst);
1706 }
1707 
1708 void Assembler::movdl(XMMRegister dst, Register src) {
1709   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1710   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1711   emit_int8(0x6E);
1712   emit_int8((unsigned char)(0xC0 | encode));
1713 }
1714 
1715 void Assembler::movdl(Register dst, XMMRegister src) {
1716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1717   // swap src/dst to get correct prefix
1718   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1719   emit_int8(0x7E);
1720   emit_int8((unsigned char)(0xC0 | encode));
1721 }
1722 
1723 void Assembler::movdl(XMMRegister dst, Address src) {
1724   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1725   InstructionMark im(this);
1726   simd_prefix(dst, src, VEX_SIMD_66);
1727   emit_int8(0x6E);
1728   emit_operand(dst, src);
1729 }
1730 
1731 void Assembler::movdl(Address dst, XMMRegister src) {
1732   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1733   InstructionMark im(this);
1734   simd_prefix(dst, src, VEX_SIMD_66);
1735   emit_int8(0x7E);
1736   emit_operand(src, dst);
1737 }
1738 
1739 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1740   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1741   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1742 }
1743 
1744 void Assembler::movdqa(XMMRegister dst, Address src) {
1745   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1746   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1747 }
1748 
1749 void Assembler::movdqu(XMMRegister dst, Address src) {
1750   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1751   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1752 }
1753 
1754 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1756   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1757 }
1758 
1759 void Assembler::movdqu(Address dst, XMMRegister src) {
1760   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1761   InstructionMark im(this);
1762   simd_prefix(dst, src, VEX_SIMD_F3);
1763   emit_int8(0x7F);
1764   emit_operand(src, dst);
1765 }
1766 
1767 // Move Unaligned 256bit Vector
1768 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1769   assert(UseAVX > 0, "");
1770   bool vector256 = true;
1771   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1772   emit_int8(0x6F);
1773   emit_int8((unsigned char)(0xC0 | encode));
1774 }
1775 
1776 void Assembler::vmovdqu(XMMRegister dst, Address src) {
1777   assert(UseAVX > 0, "");
1778   InstructionMark im(this);
1779   bool vector256 = true;
1780   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1781   emit_int8(0x6F);
1782   emit_operand(dst, src);
1783 }
1784 
1785 void Assembler::vmovdqu(Address dst, XMMRegister src) {
1786   assert(UseAVX > 0, "");
1787   InstructionMark im(this);
1788   bool vector256 = true;
1789   // swap src<->dst for encoding
1790   assert(src != xnoreg, "sanity");
1791   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1792   emit_int8(0x7F);
1793   emit_operand(src, dst);
1794 }
1795 
1796 // Uses zero extension on 64bit
1797 
1798 void Assembler::movl(Register dst, int32_t imm32) {
1799   int encode = prefix_and_encode(dst->encoding());
1800   emit_int8((unsigned char)(0xB8 | encode));
1801   emit_int32(imm32);
1802 }
1803 
1804 void Assembler::movl(Register dst, Register src) {
1805   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1806   emit_int8((unsigned char)0x8B);
1807   emit_int8((unsigned char)(0xC0 | encode));
1808 }
1809 
1810 void Assembler::movl(Register dst, Address src) {
1811   InstructionMark im(this);
1812   prefix(src, dst);
1813   emit_int8((unsigned char)0x8B);
1814   emit_operand(dst, src);
1815 }
1816 
1817 void Assembler::movl(Address dst, int32_t imm32) {
1818   InstructionMark im(this);
1819   prefix(dst);
1820   emit_int8((unsigned char)0xC7);
1821   emit_operand(rax, dst, 4);
1822   emit_int32(imm32);
1823 }
1824 
1825 void Assembler::movl(Address dst, Register src) {
1826   InstructionMark im(this);
1827   prefix(dst, src);
1828   emit_int8((unsigned char)0x89);
1829   emit_operand(src, dst);
1830 }
1831 
1832 // New cpus require to use movsd and movss to avoid partial register stall
1833 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1834 // The selection is done in MacroAssembler::movdbl() and movflt().
1835 void Assembler::movlpd(XMMRegister dst, Address src) {
1836   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1837   emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1838 }
1839 
1840 void Assembler::movq( MMXRegister dst, Address src ) {
1841   assert( VM_Version::supports_mmx(), "" );
1842   emit_int8(0x0F);
1843   emit_int8(0x6F);
1844   emit_operand(dst, src);
1845 }
1846 
1847 void Assembler::movq( Address dst, MMXRegister src ) {
1848   assert( VM_Version::supports_mmx(), "" );
1849   emit_int8(0x0F);
1850   emit_int8(0x7F);
1851   // workaround gcc (3.2.1-7a) bug
1852   // In that version of gcc with only an emit_operand(MMX, Address)
1853   // gcc will tail jump and try and reverse the parameters completely
1854   // obliterating dst in the process. By having a version available
1855   // that doesn't need to swap the args at the tail jump the bug is
1856   // avoided.
1857   emit_operand(dst, src);
1858 }
1859 
1860 void Assembler::movq(XMMRegister dst, Address src) {
1861   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1862   InstructionMark im(this);
1863   simd_prefix(dst, src, VEX_SIMD_F3);
1864   emit_int8(0x7E);
1865   emit_operand(dst, src);
1866 }
1867 
1868 void Assembler::movq(Address dst, XMMRegister src) {
1869   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1870   InstructionMark im(this);
1871   simd_prefix(dst, src, VEX_SIMD_66);
1872   emit_int8((unsigned char)0xD6);
1873   emit_operand(src, dst);
1874 }
1875 
1876 void Assembler::movsbl(Register dst, Address src) { // movsxb
1877   InstructionMark im(this);
1878   prefix(src, dst);
1879   emit_int8(0x0F);
1880   emit_int8((unsigned char)0xBE);
1881   emit_operand(dst, src);
1882 }
1883 
1884 void Assembler::movsbl(Register dst, Register src) { // movsxb
1885   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1886   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1887   emit_int8(0x0F);
1888   emit_int8((unsigned char)0xBE);
1889   emit_int8((unsigned char)(0xC0 | encode));
1890 }
1891 
1892 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1893   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1894   emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1895 }
1896 
1897 void Assembler::movsd(XMMRegister dst, Address src) {
1898   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1899   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1900 }
1901 
1902 void Assembler::movsd(Address dst, XMMRegister src) {
1903   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1904   InstructionMark im(this);
1905   simd_prefix(dst, src, VEX_SIMD_F2);
1906   emit_int8(0x11);
1907   emit_operand(src, dst);
1908 }
1909 
1910 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1911   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1912   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1913 }
1914 
1915 void Assembler::movss(XMMRegister dst, Address src) {
1916   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1917   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1918 }
1919 
1920 void Assembler::movss(Address dst, XMMRegister src) {
1921   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1922   InstructionMark im(this);
1923   simd_prefix(dst, src, VEX_SIMD_F3);
1924   emit_int8(0x11);
1925   emit_operand(src, dst);
1926 }
1927 
1928 void Assembler::movswl(Register dst, Address src) { // movsxw
1929   InstructionMark im(this);
1930   prefix(src, dst);
1931   emit_int8(0x0F);
1932   emit_int8((unsigned char)0xBF);
1933   emit_operand(dst, src);
1934 }
1935 
1936 void Assembler::movswl(Register dst, Register src) { // movsxw
1937   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1938   emit_int8(0x0F);
1939   emit_int8((unsigned char)0xBF);
1940   emit_int8((unsigned char)(0xC0 | encode));
1941 }
1942 
1943 void Assembler::movw(Address dst, int imm16) {
1944   InstructionMark im(this);
1945 
1946   emit_int8(0x66); // switch to 16-bit mode
1947   prefix(dst);
1948   emit_int8((unsigned char)0xC7);
1949   emit_operand(rax, dst, 2);
1950   emit_int16(imm16);
1951 }
1952 
1953 void Assembler::movw(Register dst, Address src) {
1954   InstructionMark im(this);
1955   emit_int8(0x66);
1956   prefix(src, dst);
1957   emit_int8((unsigned char)0x8B);
1958   emit_operand(dst, src);
1959 }
1960 
1961 void Assembler::movw(Address dst, Register src) {
1962   InstructionMark im(this);
1963   emit_int8(0x66);
1964   prefix(dst, src);
1965   emit_int8((unsigned char)0x89);
1966   emit_operand(src, dst);
1967 }
1968 
1969 void Assembler::movzbl(Register dst, Address src) { // movzxb
1970   InstructionMark im(this);
1971   prefix(src, dst);
1972   emit_int8(0x0F);
1973   emit_int8((unsigned char)0xB6);
1974   emit_operand(dst, src);
1975 }
1976 
1977 void Assembler::movzbl(Register dst, Register src) { // movzxb
1978   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1979   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1980   emit_int8(0x0F);
1981   emit_int8((unsigned char)0xB6);
1982   emit_int8(0xC0 | encode);
1983 }
1984 
1985 void Assembler::movzwl(Register dst, Address src) { // movzxw
1986   InstructionMark im(this);
1987   prefix(src, dst);
1988   emit_int8(0x0F);
1989   emit_int8((unsigned char)0xB7);
1990   emit_operand(dst, src);
1991 }
1992 
1993 void Assembler::movzwl(Register dst, Register src) { // movzxw
1994   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1995   emit_int8(0x0F);
1996   emit_int8((unsigned char)0xB7);
1997   emit_int8(0xC0 | encode);
1998 }
1999 
2000 void Assembler::mull(Address src) {
2001   InstructionMark im(this);
2002   prefix(src);
2003   emit_int8((unsigned char)0xF7);
2004   emit_operand(rsp, src);
2005 }
2006 
2007 void Assembler::mull(Register src) {
2008   int encode = prefix_and_encode(src->encoding());
2009   emit_int8((unsigned char)0xF7);
2010   emit_int8((unsigned char)(0xE0 | encode));
2011 }
2012 
2013 void Assembler::mulsd(XMMRegister dst, Address src) {
2014   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2015   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2016 }
2017 
2018 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2019   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2020   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2021 }
2022 
2023 void Assembler::mulss(XMMRegister dst, Address src) {
2024   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2025   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2026 }
2027 
2028 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2029   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2030   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2031 }
2032 
2033 void Assembler::negl(Register dst) {
2034   int encode = prefix_and_encode(dst->encoding());
2035   emit_int8((unsigned char)0xF7);
2036   emit_int8((unsigned char)(0xD8 | encode));
2037 }
2038 
2039 void Assembler::nop(int i) {
2040 #ifdef ASSERT
2041   assert(i > 0, " ");
2042   // The fancy nops aren't currently recognized by debuggers making it a
2043   // pain to disassemble code while debugging. If asserts are on clearly
2044   // speed is not an issue so simply use the single byte traditional nop
2045   // to do alignment.
2046 
2047   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2048   return;
2049 
2050 #endif // ASSERT
2051 
2052   if (UseAddressNop && VM_Version::is_intel()) {
2053     //
2054     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2055     //  1: 0x90
2056     //  2: 0x66 0x90
2057     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2058     //  4: 0x0F 0x1F 0x40 0x00
2059     //  5: 0x0F 0x1F 0x44 0x00 0x00
2060     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2061     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2062     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2063     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2064     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2065     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2066 
2067     // The rest coding is Intel specific - don't use consecutive address nops
2068 
2069     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2070     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2071     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2072     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2073 
2074     while(i >= 15) {
2075       // For Intel don't generate consecutive addess nops (mix with regular nops)
2076       i -= 15;
2077       emit_int8(0x66);   // size prefix
2078       emit_int8(0x66);   // size prefix
2079       emit_int8(0x66);   // size prefix
2080       addr_nop_8();
2081       emit_int8(0x66);   // size prefix
2082       emit_int8(0x66);   // size prefix
2083       emit_int8(0x66);   // size prefix
2084       emit_int8((unsigned char)0x90);
2085                          // nop
2086     }
2087     switch (i) {
2088       case 14:
2089         emit_int8(0x66); // size prefix
2090       case 13:
2091         emit_int8(0x66); // size prefix
2092       case 12:
2093         addr_nop_8();
2094         emit_int8(0x66); // size prefix
2095         emit_int8(0x66); // size prefix
2096         emit_int8(0x66); // size prefix
2097         emit_int8((unsigned char)0x90);
2098                          // nop
2099         break;
2100       case 11:
2101         emit_int8(0x66); // size prefix
2102       case 10:
2103         emit_int8(0x66); // size prefix
2104       case 9:
2105         emit_int8(0x66); // size prefix
2106       case 8:
2107         addr_nop_8();
2108         break;
2109       case 7:
2110         addr_nop_7();
2111         break;
2112       case 6:
2113         emit_int8(0x66); // size prefix
2114       case 5:
2115         addr_nop_5();
2116         break;
2117       case 4:
2118         addr_nop_4();
2119         break;
2120       case 3:
2121         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2122         emit_int8(0x66); // size prefix
2123       case 2:
2124         emit_int8(0x66); // size prefix
2125       case 1:
2126         emit_int8((unsigned char)0x90);
2127                          // nop
2128         break;
2129       default:
2130         assert(i == 0, " ");
2131     }
2132     return;
2133   }
2134   if (UseAddressNop && VM_Version::is_amd()) {
2135     //
2136     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2137     //  1: 0x90
2138     //  2: 0x66 0x90
2139     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2140     //  4: 0x0F 0x1F 0x40 0x00
2141     //  5: 0x0F 0x1F 0x44 0x00 0x00
2142     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2143     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2144     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2145     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2146     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2147     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2148 
2149     // The rest coding is AMD specific - use consecutive address nops
2150 
2151     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2152     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2153     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2154     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2155     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2156     //     Size prefixes (0x66) are added for larger sizes
2157 
2158     while(i >= 22) {
2159       i -= 11;
2160       emit_int8(0x66); // size prefix
2161       emit_int8(0x66); // size prefix
2162       emit_int8(0x66); // size prefix
2163       addr_nop_8();
2164     }
2165     // Generate first nop for size between 21-12
2166     switch (i) {
2167       case 21:
2168         i -= 1;
2169         emit_int8(0x66); // size prefix
2170       case 20:
2171       case 19:
2172         i -= 1;
2173         emit_int8(0x66); // size prefix
2174       case 18:
2175       case 17:
2176         i -= 1;
2177         emit_int8(0x66); // size prefix
2178       case 16:
2179       case 15:
2180         i -= 8;
2181         addr_nop_8();
2182         break;
2183       case 14:
2184       case 13:
2185         i -= 7;
2186         addr_nop_7();
2187         break;
2188       case 12:
2189         i -= 6;
2190         emit_int8(0x66); // size prefix
2191         addr_nop_5();
2192         break;
2193       default:
2194         assert(i < 12, " ");
2195     }
2196 
2197     // Generate second nop for size between 11-1
2198     switch (i) {
2199       case 11:
2200         emit_int8(0x66); // size prefix
2201       case 10:
2202         emit_int8(0x66); // size prefix
2203       case 9:
2204         emit_int8(0x66); // size prefix
2205       case 8:
2206         addr_nop_8();
2207         break;
2208       case 7:
2209         addr_nop_7();
2210         break;
2211       case 6:
2212         emit_int8(0x66); // size prefix
2213       case 5:
2214         addr_nop_5();
2215         break;
2216       case 4:
2217         addr_nop_4();
2218         break;
2219       case 3:
2220         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2221         emit_int8(0x66); // size prefix
2222       case 2:
2223         emit_int8(0x66); // size prefix
2224       case 1:
2225         emit_int8((unsigned char)0x90);
2226                          // nop
2227         break;
2228       default:
2229         assert(i == 0, " ");
2230     }
2231     return;
2232   }
2233 
2234   // Using nops with size prefixes "0x66 0x90".
2235   // From AMD Optimization Guide:
2236   //  1: 0x90
2237   //  2: 0x66 0x90
2238   //  3: 0x66 0x66 0x90
2239   //  4: 0x66 0x66 0x66 0x90
2240   //  5: 0x66 0x66 0x90 0x66 0x90
2241   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2242   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2243   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2244   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2245   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2246   //
2247   while(i > 12) {
2248     i -= 4;
2249     emit_int8(0x66); // size prefix
2250     emit_int8(0x66);
2251     emit_int8(0x66);
2252     emit_int8((unsigned char)0x90);
2253                      // nop
2254   }
2255   // 1 - 12 nops
2256   if(i > 8) {
2257     if(i > 9) {
2258       i -= 1;
2259       emit_int8(0x66);
2260     }
2261     i -= 3;
2262     emit_int8(0x66);
2263     emit_int8(0x66);
2264     emit_int8((unsigned char)0x90);
2265   }
2266   // 1 - 8 nops
2267   if(i > 4) {
2268     if(i > 6) {
2269       i -= 1;
2270       emit_int8(0x66);
2271     }
2272     i -= 3;
2273     emit_int8(0x66);
2274     emit_int8(0x66);
2275     emit_int8((unsigned char)0x90);
2276   }
2277   switch (i) {
2278     case 4:
2279       emit_int8(0x66);
2280     case 3:
2281       emit_int8(0x66);
2282     case 2:
2283       emit_int8(0x66);
2284     case 1:
2285       emit_int8((unsigned char)0x90);
2286       break;
2287     default:
2288       assert(i == 0, " ");
2289   }
2290 }
2291 
2292 void Assembler::notl(Register dst) {
2293   int encode = prefix_and_encode(dst->encoding());
2294   emit_int8((unsigned char)0xF7);
2295   emit_int8((unsigned char)(0xD0 | encode));
2296 }
2297 
2298 void Assembler::orl(Address dst, int32_t imm32) {
2299   InstructionMark im(this);
2300   prefix(dst);
2301   emit_arith_operand(0x81, rcx, dst, imm32);
2302 }
2303 
2304 void Assembler::orl(Register dst, int32_t imm32) {
2305   prefix(dst);
2306   emit_arith(0x81, 0xC8, dst, imm32);
2307 }
2308 
2309 void Assembler::orl(Register dst, Address src) {
2310   InstructionMark im(this);
2311   prefix(src, dst);
2312   emit_int8(0x0B);
2313   emit_operand(dst, src);
2314 }
2315 
2316 void Assembler::orl(Register dst, Register src) {
2317   (void) prefix_and_encode(dst->encoding(), src->encoding());
2318   emit_arith(0x0B, 0xC0, dst, src);
2319 }
2320 
2321 void Assembler::orl(Address dst, Register src) {
2322   InstructionMark im(this);
2323   prefix(dst, src);
2324   emit_int8(0x09);
2325   emit_operand(src, dst);
2326 }
2327 
2328 void Assembler::packuswb(XMMRegister dst, Address src) {
2329   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2330   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2331   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2332 }
2333 
2334 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2335   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2336   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2337 }
2338 
2339 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2340   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
2341   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
2342 }
2343 
2344 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
2345   assert(VM_Version::supports_avx2(), "");
2346   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
2347   emit_int8(0x00);
2348   emit_int8(0xC0 | encode);
2349   emit_int8(imm8);
2350 }
2351 
2352 void Assembler::pause() {
2353   emit_int8((unsigned char)0xF3);
2354   emit_int8((unsigned char)0x90);
2355 }
2356 
2357 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2358   assert(VM_Version::supports_sse4_2(), "");
2359   InstructionMark im(this);
2360   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2361   emit_int8(0x61);
2362   emit_operand(dst, src);
2363   emit_int8(imm8);
2364 }
2365 
2366 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2367   assert(VM_Version::supports_sse4_2(), "");
2368   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2369   emit_int8(0x61);
2370   emit_int8((unsigned char)(0xC0 | encode));
2371   emit_int8(imm8);
2372 }
2373 
2374 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2375   assert(VM_Version::supports_sse4_1(), "");
2376   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2377   emit_int8(0x16);
2378   emit_int8((unsigned char)(0xC0 | encode));
2379   emit_int8(imm8);
2380 }
2381 
2382 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2383   assert(VM_Version::supports_sse4_1(), "");
2384   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2385   emit_int8(0x16);
2386   emit_int8((unsigned char)(0xC0 | encode));
2387   emit_int8(imm8);
2388 }
2389 
2390 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2391   assert(VM_Version::supports_sse4_1(), "");
2392   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
2393   emit_int8(0x22);
2394   emit_int8((unsigned char)(0xC0 | encode));
2395   emit_int8(imm8);
2396 }
2397 
2398 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2399   assert(VM_Version::supports_sse4_1(), "");
2400   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
2401   emit_int8(0x22);
2402   emit_int8((unsigned char)(0xC0 | encode));
2403   emit_int8(imm8);
2404 }
2405 
2406 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2407   assert(VM_Version::supports_sse4_1(), "");
2408   InstructionMark im(this);
2409   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2410   emit_int8(0x30);
2411   emit_operand(dst, src);
2412 }
2413 
2414 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2415   assert(VM_Version::supports_sse4_1(), "");
2416   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2417   emit_int8(0x30);
2418   emit_int8((unsigned char)(0xC0 | encode));
2419 }
2420 
2421 // generic
2422 void Assembler::pop(Register dst) {
2423   int encode = prefix_and_encode(dst->encoding());
2424   emit_int8(0x58 | encode);
2425 }
2426 
2427 void Assembler::popcntl(Register dst, Address src) {
2428   assert(VM_Version::supports_popcnt(), "must support");
2429   InstructionMark im(this);
2430   emit_int8((unsigned char)0xF3);
2431   prefix(src, dst);
2432   emit_int8(0x0F);
2433   emit_int8((unsigned char)0xB8);
2434   emit_operand(dst, src);
2435 }
2436 
2437 void Assembler::popcntl(Register dst, Register src) {
2438   assert(VM_Version::supports_popcnt(), "must support");
2439   emit_int8((unsigned char)0xF3);
2440   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2441   emit_int8(0x0F);
2442   emit_int8((unsigned char)0xB8);
2443   emit_int8((unsigned char)(0xC0 | encode));
2444 }
2445 
2446 void Assembler::popf() {
2447   emit_int8((unsigned char)0x9D);
2448 }
2449 
2450 #ifndef _LP64 // no 32bit push/pop on amd64
2451 void Assembler::popl(Address dst) {
2452   // NOTE: this will adjust stack by 8byte on 64bits
2453   InstructionMark im(this);
2454   prefix(dst);
2455   emit_int8((unsigned char)0x8F);
2456   emit_operand(rax, dst);
2457 }
2458 #endif
2459 
2460 void Assembler::prefetch_prefix(Address src) {
2461   prefix(src);
2462   emit_int8(0x0F);
2463 }
2464 
2465 void Assembler::prefetchnta(Address src) {
2466   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2467   InstructionMark im(this);
2468   prefetch_prefix(src);
2469   emit_int8(0x18);
2470   emit_operand(rax, src); // 0, src
2471 }
2472 
2473 void Assembler::prefetchr(Address src) {
2474   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2475   InstructionMark im(this);
2476   prefetch_prefix(src);
2477   emit_int8(0x0D);
2478   emit_operand(rax, src); // 0, src
2479 }
2480 
2481 void Assembler::prefetcht0(Address src) {
2482   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2483   InstructionMark im(this);
2484   prefetch_prefix(src);
2485   emit_int8(0x18);
2486   emit_operand(rcx, src); // 1, src
2487 }
2488 
2489 void Assembler::prefetcht1(Address src) {
2490   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2491   InstructionMark im(this);
2492   prefetch_prefix(src);
2493   emit_int8(0x18);
2494   emit_operand(rdx, src); // 2, src
2495 }
2496 
2497 void Assembler::prefetcht2(Address src) {
2498   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2499   InstructionMark im(this);
2500   prefetch_prefix(src);
2501   emit_int8(0x18);
2502   emit_operand(rbx, src); // 3, src
2503 }
2504 
2505 void Assembler::prefetchw(Address src) {
2506   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2507   InstructionMark im(this);
2508   prefetch_prefix(src);
2509   emit_int8(0x0D);
2510   emit_operand(rcx, src); // 1, src
2511 }
2512 
2513 void Assembler::prefix(Prefix p) {
2514   emit_int8(p);
2515 }
2516 
2517 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2518   assert(VM_Version::supports_ssse3(), "");
2519   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2520   emit_int8(0x00);
2521   emit_int8((unsigned char)(0xC0 | encode));
2522 }
2523 
2524 void Assembler::pshufb(XMMRegister dst, Address src) {
2525   assert(VM_Version::supports_ssse3(), "");
2526   InstructionMark im(this);
2527   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2528   emit_int8(0x00);
2529   emit_operand(dst, src);
2530 }
2531 
2532 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2533   assert(isByte(mode), "invalid value");
2534   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2535   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2536   emit_int8(mode & 0xFF);
2537 
2538 }
2539 
2540 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2541   assert(isByte(mode), "invalid value");
2542   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2543   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2544   InstructionMark im(this);
2545   simd_prefix(dst, src, VEX_SIMD_66);
2546   emit_int8(0x70);
2547   emit_operand(dst, src);
2548   emit_int8(mode & 0xFF);
2549 }
2550 
2551 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2552   assert(isByte(mode), "invalid value");
2553   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2554   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2555   emit_int8(mode & 0xFF);
2556 }
2557 
2558 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2559   assert(isByte(mode), "invalid value");
2560   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2561   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2562   InstructionMark im(this);
2563   simd_prefix(dst, src, VEX_SIMD_F2);
2564   emit_int8(0x70);
2565   emit_operand(dst, src);
2566   emit_int8(mode & 0xFF);
2567 }
2568 
2569 void Assembler::psrldq(XMMRegister dst, int shift) {
2570   // Shift 128 bit value in xmm register by number of bytes.
2571   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2572   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2573   emit_int8(0x73);
2574   emit_int8((unsigned char)(0xC0 | encode));
2575   emit_int8(shift);
2576 }
2577 
2578 void Assembler::pslldq(XMMRegister dst, int shift) {
2579   // Shift left 128 bit value in xmm register by number of bytes.
2580   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2581   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66);
2582   emit_int8(0x73);
2583   emit_int8((unsigned char)(0xC0 | encode));
2584   emit_int8(shift);
2585 }
2586 
2587 void Assembler::ptest(XMMRegister dst, Address src) {
2588   assert(VM_Version::supports_sse4_1(), "");
2589   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2590   InstructionMark im(this);
2591   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2592   emit_int8(0x17);
2593   emit_operand(dst, src);
2594 }
2595 
2596 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2597   assert(VM_Version::supports_sse4_1(), "");
2598   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2599   emit_int8(0x17);
2600   emit_int8((unsigned char)(0xC0 | encode));
2601 }
2602 
2603 void Assembler::vptest(XMMRegister dst, Address src) {
2604   assert(VM_Version::supports_avx(), "");
2605   InstructionMark im(this);
2606   bool vector256 = true;
2607   assert(dst != xnoreg, "sanity");
2608   int dst_enc = dst->encoding();
2609   // swap src<->dst for encoding
2610   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
2611   emit_int8(0x17);
2612   emit_operand(dst, src);
2613 }
2614 
2615 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
2616   assert(VM_Version::supports_avx(), "");
2617   bool vector256 = true;
2618   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
2619   emit_int8(0x17);
2620   emit_int8((unsigned char)(0xC0 | encode));
2621 }
2622 
2623 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2624   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2625   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2626   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2627 }
2628 
2629 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2630   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2631   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2632 }
2633 
2634 void Assembler::punpckldq(XMMRegister dst, Address src) {
2635   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2636   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2637   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2638 }
2639 
2640 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2641   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2642   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2643 }
2644 
2645 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2646   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2647   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2648 }
2649 
2650 void Assembler::push(int32_t imm32) {
2651   // in 64bits we push 64bits onto the stack but only
2652   // take a 32bit immediate
2653   emit_int8(0x68);
2654   emit_int32(imm32);
2655 }
2656 
2657 void Assembler::push(Register src) {
2658   int encode = prefix_and_encode(src->encoding());
2659 
2660   emit_int8(0x50 | encode);
2661 }
2662 
2663 void Assembler::pushf() {
2664   emit_int8((unsigned char)0x9C);
2665 }
2666 
2667 #ifndef _LP64 // no 32bit push/pop on amd64
2668 void Assembler::pushl(Address src) {
2669   // Note this will push 64bit on 64bit
2670   InstructionMark im(this);
2671   prefix(src);
2672   emit_int8((unsigned char)0xFF);
2673   emit_operand(rsi, src);
2674 }
2675 #endif
2676 
2677 void Assembler::rcll(Register dst, int imm8) {
2678   assert(isShiftCount(imm8), "illegal shift count");
2679   int encode = prefix_and_encode(dst->encoding());
2680   if (imm8 == 1) {
2681     emit_int8((unsigned char)0xD1);
2682     emit_int8((unsigned char)(0xD0 | encode));
2683   } else {
2684     emit_int8((unsigned char)0xC1);
2685     emit_int8((unsigned char)0xD0 | encode);
2686     emit_int8(imm8);
2687   }
2688 }
2689 
2690 void Assembler::rdtsc() {
2691   emit_int8((unsigned char)0x0F);
2692   emit_int8((unsigned char)0x31);
2693 }
2694 
2695 // copies data from [esi] to [edi] using rcx pointer sized words
2696 // generic
2697 void Assembler::rep_mov() {
2698   emit_int8((unsigned char)0xF3);
2699   // MOVSQ
2700   LP64_ONLY(prefix(REX_W));
2701   emit_int8((unsigned char)0xA5);
2702 }
2703 
2704 // sets rcx bytes with rax, value at [edi]
2705 void Assembler::rep_stosb() {
2706   emit_int8((unsigned char)0xF3); // REP
2707   LP64_ONLY(prefix(REX_W));
2708   emit_int8((unsigned char)0xAA); // STOSB
2709 }
2710 
2711 // sets rcx pointer sized words with rax, value at [edi]
2712 // generic
2713 void Assembler::rep_stos() {
2714   emit_int8((unsigned char)0xF3); // REP
2715   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
2716   emit_int8((unsigned char)0xAB);
2717 }
2718 
2719 // scans rcx pointer sized words at [edi] for occurance of rax,
2720 // generic
2721 void Assembler::repne_scan() { // repne_scan
2722   emit_int8((unsigned char)0xF2);
2723   // SCASQ
2724   LP64_ONLY(prefix(REX_W));
2725   emit_int8((unsigned char)0xAF);
2726 }
2727 
2728 #ifdef _LP64
2729 // scans rcx 4 byte words at [edi] for occurance of rax,
2730 // generic
2731 void Assembler::repne_scanl() { // repne_scan
2732   emit_int8((unsigned char)0xF2);
2733   // SCASL
2734   emit_int8((unsigned char)0xAF);
2735 }
2736 #endif
2737 
2738 void Assembler::ret(int imm16) {
2739   if (imm16 == 0) {
2740     emit_int8((unsigned char)0xC3);
2741   } else {
2742     emit_int8((unsigned char)0xC2);
2743     emit_int16(imm16);
2744   }
2745 }
2746 
2747 void Assembler::sahf() {
2748 #ifdef _LP64
2749   // Not supported in 64bit mode
2750   ShouldNotReachHere();
2751 #endif
2752   emit_int8((unsigned char)0x9E);
2753 }
2754 
2755 void Assembler::sarl(Register dst, int imm8) {
2756   int encode = prefix_and_encode(dst->encoding());
2757   assert(isShiftCount(imm8), "illegal shift count");
2758   if (imm8 == 1) {
2759     emit_int8((unsigned char)0xD1);
2760     emit_int8((unsigned char)(0xF8 | encode));
2761   } else {
2762     emit_int8((unsigned char)0xC1);
2763     emit_int8((unsigned char)(0xF8 | encode));
2764     emit_int8(imm8);
2765   }
2766 }
2767 
2768 void Assembler::sarl(Register dst) {
2769   int encode = prefix_and_encode(dst->encoding());
2770   emit_int8((unsigned char)0xD3);
2771   emit_int8((unsigned char)(0xF8 | encode));
2772 }
2773 
2774 void Assembler::sbbl(Address dst, int32_t imm32) {
2775   InstructionMark im(this);
2776   prefix(dst);
2777   emit_arith_operand(0x81, rbx, dst, imm32);
2778 }
2779 
2780 void Assembler::sbbl(Register dst, int32_t imm32) {
2781   prefix(dst);
2782   emit_arith(0x81, 0xD8, dst, imm32);
2783 }
2784 
2785 
2786 void Assembler::sbbl(Register dst, Address src) {
2787   InstructionMark im(this);
2788   prefix(src, dst);
2789   emit_int8(0x1B);
2790   emit_operand(dst, src);
2791 }
2792 
2793 void Assembler::sbbl(Register dst, Register src) {
2794   (void) prefix_and_encode(dst->encoding(), src->encoding());
2795   emit_arith(0x1B, 0xC0, dst, src);
2796 }
2797 
2798 void Assembler::setb(Condition cc, Register dst) {
2799   assert(0 <= cc && cc < 16, "illegal cc");
2800   int encode = prefix_and_encode(dst->encoding(), true);
2801   emit_int8(0x0F);
2802   emit_int8((unsigned char)0x90 | cc);
2803   emit_int8((unsigned char)(0xC0 | encode));
2804 }
2805 
2806 void Assembler::shll(Register dst, int imm8) {
2807   assert(isShiftCount(imm8), "illegal shift count");
2808   int encode = prefix_and_encode(dst->encoding());
2809   if (imm8 == 1 ) {
2810     emit_int8((unsigned char)0xD1);
2811     emit_int8((unsigned char)(0xE0 | encode));
2812   } else {
2813     emit_int8((unsigned char)0xC1);
2814     emit_int8((unsigned char)(0xE0 | encode));
2815     emit_int8(imm8);
2816   }
2817 }
2818 
2819 void Assembler::shll(Register dst) {
2820   int encode = prefix_and_encode(dst->encoding());
2821   emit_int8((unsigned char)0xD3);
2822   emit_int8((unsigned char)(0xE0 | encode));
2823 }
2824 
2825 void Assembler::shrl(Register dst, int imm8) {
2826   assert(isShiftCount(imm8), "illegal shift count");
2827   int encode = prefix_and_encode(dst->encoding());
2828   emit_int8((unsigned char)0xC1);
2829   emit_int8((unsigned char)(0xE8 | encode));
2830   emit_int8(imm8);
2831 }
2832 
2833 void Assembler::shrl(Register dst) {
2834   int encode = prefix_and_encode(dst->encoding());
2835   emit_int8((unsigned char)0xD3);
2836   emit_int8((unsigned char)(0xE8 | encode));
2837 }
2838 
2839 // copies a single word from [esi] to [edi]
2840 void Assembler::smovl() {
2841   emit_int8((unsigned char)0xA5);
2842 }
2843 
2844 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2845   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2846   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2847 }
2848 
2849 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2850   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2851   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2852 }
2853 
2854 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2855   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2856   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2857 }
2858 
2859 void Assembler::std() {
2860   emit_int8((unsigned char)0xFD);
2861 }
2862 
2863 void Assembler::sqrtss(XMMRegister dst, Address src) {
2864   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2865   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2866 }
2867 
2868 void Assembler::stmxcsr( Address dst) {
2869   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2870   InstructionMark im(this);
2871   prefix(dst);
2872   emit_int8(0x0F);
2873   emit_int8((unsigned char)0xAE);
2874   emit_operand(as_Register(3), dst);
2875 }
2876 
2877 void Assembler::subl(Address dst, int32_t imm32) {
2878   InstructionMark im(this);
2879   prefix(dst);
2880   emit_arith_operand(0x81, rbp, dst, imm32);
2881 }
2882 
2883 void Assembler::subl(Address dst, Register src) {
2884   InstructionMark im(this);
2885   prefix(dst, src);
2886   emit_int8(0x29);
2887   emit_operand(src, dst);
2888 }
2889 
2890 void Assembler::subl(Register dst, int32_t imm32) {
2891   prefix(dst);
2892   emit_arith(0x81, 0xE8, dst, imm32);
2893 }
2894 
2895 // Force generation of a 4 byte immediate value even if it fits into 8bit
2896 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2897   prefix(dst);
2898   emit_arith_imm32(0x81, 0xE8, dst, imm32);
2899 }
2900 
2901 void Assembler::subl(Register dst, Address src) {
2902   InstructionMark im(this);
2903   prefix(src, dst);
2904   emit_int8(0x2B);
2905   emit_operand(dst, src);
2906 }
2907 
2908 void Assembler::subl(Register dst, Register src) {
2909   (void) prefix_and_encode(dst->encoding(), src->encoding());
2910   emit_arith(0x2B, 0xC0, dst, src);
2911 }
2912 
2913 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2914   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2915   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2916 }
2917 
2918 void Assembler::subsd(XMMRegister dst, Address src) {
2919   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2920   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2921 }
2922 
2923 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2924   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2925   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2926 }
2927 
2928 void Assembler::subss(XMMRegister dst, Address src) {
2929   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2930   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2931 }
2932 
2933 void Assembler::testb(Register dst, int imm8) {
2934   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2935   (void) prefix_and_encode(dst->encoding(), true);
2936   emit_arith_b(0xF6, 0xC0, dst, imm8);
2937 }
2938 
2939 void Assembler::testb(Address dst, int imm8) {
2940   InstructionMark im(this);
2941   prefix(dst);
2942   emit_int8((unsigned char)0xF6);
2943   emit_operand(rax, dst, 1);
2944   emit_int8(imm8);
2945 }
2946 
2947 void Assembler::testl(Register dst, int32_t imm32) {
2948   // not using emit_arith because test
2949   // doesn't support sign-extension of
2950   // 8bit operands
2951   int encode = dst->encoding();
2952   if (encode == 0) {
2953     emit_int8((unsigned char)0xA9);
2954   } else {
2955     encode = prefix_and_encode(encode);
2956     emit_int8((unsigned char)0xF7);
2957     emit_int8((unsigned char)(0xC0 | encode));
2958   }
2959   emit_int32(imm32);
2960 }
2961 
2962 void Assembler::testl(Register dst, Register src) {
2963   (void) prefix_and_encode(dst->encoding(), src->encoding());
2964   emit_arith(0x85, 0xC0, dst, src);
2965 }
2966 
2967 void Assembler::testl(Register dst, Address  src) {
2968   InstructionMark im(this);
2969   prefix(src, dst);
2970   emit_int8((unsigned char)0x85);
2971   emit_operand(dst, src);
2972 }
2973 
2974 void Assembler::tzcntl(Register dst, Register src) {
2975   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2976   emit_int8((unsigned char)0xF3);
2977   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2978   emit_int8(0x0F);
2979   emit_int8((unsigned char)0xBC);
2980   emit_int8((unsigned char)0xC0 | encode);
2981 }
2982 
2983 void Assembler::tzcntq(Register dst, Register src) {
2984   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
2985   emit_int8((unsigned char)0xF3);
2986   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
2987   emit_int8(0x0F);
2988   emit_int8((unsigned char)0xBC);
2989   emit_int8((unsigned char)(0xC0 | encode));
2990 }
2991 
2992 void Assembler::ucomisd(XMMRegister dst, Address src) {
2993   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2994   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2995 }
2996 
2997 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2998   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2999   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3000 }
3001 
3002 void Assembler::ucomiss(XMMRegister dst, Address src) {
3003   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3004   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
3005 }
3006 
3007 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3008   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3009   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
3010 }
3011 
3012 void Assembler::xabort(int8_t imm8) {
3013   emit_int8((unsigned char)0xC6);
3014   emit_int8((unsigned char)0xF8);
3015   emit_int8((unsigned char)(imm8 & 0xFF));
3016 }
3017 
3018 void Assembler::xaddl(Address dst, Register src) {
3019   InstructionMark im(this);
3020   prefix(dst, src);
3021   emit_int8(0x0F);
3022   emit_int8((unsigned char)0xC1);
3023   emit_operand(src, dst);
3024 }
3025 
3026 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3027   InstructionMark im(this);
3028   relocate(rtype);
3029   if (abort.is_bound()) {
3030     address entry = target(abort);
3031     assert(entry != NULL, "abort entry NULL");
3032     intptr_t offset = entry - pc();
3033     emit_int8((unsigned char)0xC7);
3034     emit_int8((unsigned char)0xF8);
3035     emit_int32(offset - 6); // 2 opcode + 4 address
3036   } else {
3037     abort.add_patch_at(code(), locator());
3038     emit_int8((unsigned char)0xC7);
3039     emit_int8((unsigned char)0xF8);
3040     emit_int32(0);
3041   }
3042 }
3043 
3044 void Assembler::xchgl(Register dst, Address src) { // xchg
3045   InstructionMark im(this);
3046   prefix(src, dst);
3047   emit_int8((unsigned char)0x87);
3048   emit_operand(dst, src);
3049 }
3050 
3051 void Assembler::xchgl(Register dst, Register src) {
3052   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3053   emit_int8((unsigned char)0x87);
3054   emit_int8((unsigned char)(0xC0 | encode));
3055 }
3056 
3057 void Assembler::xend() {
3058   emit_int8((unsigned char)0x0F);
3059   emit_int8((unsigned char)0x01);
3060   emit_int8((unsigned char)0xD5);
3061 }
3062 
3063 void Assembler::xgetbv() {
3064   emit_int8(0x0F);
3065   emit_int8(0x01);
3066   emit_int8((unsigned char)0xD0);
3067 }
3068 
3069 void Assembler::xorl(Register dst, int32_t imm32) {
3070   prefix(dst);
3071   emit_arith(0x81, 0xF0, dst, imm32);
3072 }
3073 
3074 void Assembler::xorl(Register dst, Address src) {
3075   InstructionMark im(this);
3076   prefix(src, dst);
3077   emit_int8(0x33);
3078   emit_operand(dst, src);
3079 }
3080 
3081 void Assembler::xorl(Register dst, Register src) {
3082   (void) prefix_and_encode(dst->encoding(), src->encoding());
3083   emit_arith(0x33, 0xC0, dst, src);
3084 }
3085 
3086 
3087 // AVX 3-operands scalar float-point arithmetic instructions
3088 
3089 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3090   assert(VM_Version::supports_avx(), "");
3091   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3092 }
3093 
3094 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3095   assert(VM_Version::supports_avx(), "");
3096   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3097 }
3098 
3099 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3100   assert(VM_Version::supports_avx(), "");
3101   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3102 }
3103 
3104 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3105   assert(VM_Version::supports_avx(), "");
3106   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3107 }
3108 
3109 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3110   assert(VM_Version::supports_avx(), "");
3111   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3112 }
3113 
3114 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3115   assert(VM_Version::supports_avx(), "");
3116   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3117 }
3118 
3119 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3120   assert(VM_Version::supports_avx(), "");
3121   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3122 }
3123 
3124 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3125   assert(VM_Version::supports_avx(), "");
3126   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3127 }
3128 
3129 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3130   assert(VM_Version::supports_avx(), "");
3131   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3132 }
3133 
3134 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3135   assert(VM_Version::supports_avx(), "");
3136   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3137 }
3138 
3139 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3140   assert(VM_Version::supports_avx(), "");
3141   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3142 }
3143 
3144 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3145   assert(VM_Version::supports_avx(), "");
3146   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3147 }
3148 
3149 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3150   assert(VM_Version::supports_avx(), "");
3151   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3152 }
3153 
3154 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3155   assert(VM_Version::supports_avx(), "");
3156   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
3157 }
3158 
3159 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3160   assert(VM_Version::supports_avx(), "");
3161   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3162 }
3163 
3164 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3165   assert(VM_Version::supports_avx(), "");
3166   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
3167 }
3168 
3169 //====================VECTOR ARITHMETIC=====================================
3170 
3171 // Float-point vector arithmetic
3172 
3173 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3174   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3175   emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3176 }
3177 
3178 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3179   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3180   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3181 }
3182 
3183 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3184   assert(VM_Version::supports_avx(), "");
3185   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3186 }
3187 
3188 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3189   assert(VM_Version::supports_avx(), "");
3190   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3191 }
3192 
3193 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3194   assert(VM_Version::supports_avx(), "");
3195   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
3196 }
3197 
3198 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3199   assert(VM_Version::supports_avx(), "");
3200   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
3201 }
3202 
3203 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3204   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3205   emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3206 }
3207 
3208 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3209   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3210   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3211 }
3212 
3213 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3214   assert(VM_Version::supports_avx(), "");
3215   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3216 }
3217 
3218 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3219   assert(VM_Version::supports_avx(), "");
3220   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3221 }
3222 
3223 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3224   assert(VM_Version::supports_avx(), "");
3225   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3226 }
3227 
3228 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3229   assert(VM_Version::supports_avx(), "");
3230   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3231 }
3232 
3233 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3234   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3235   emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3236 }
3237 
3238 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3239   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3240   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3241 }
3242 
3243 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3244   assert(VM_Version::supports_avx(), "");
3245   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3246 }
3247 
3248 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3249   assert(VM_Version::supports_avx(), "");
3250   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3251 }
3252 
3253 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3254   assert(VM_Version::supports_avx(), "");
3255   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3256 }
3257 
3258 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3259   assert(VM_Version::supports_avx(), "");
3260   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3261 }
3262 
3263 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3264   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3265   emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3266 }
3267 
3268 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3269   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3270   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3271 }
3272 
3273 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3274   assert(VM_Version::supports_avx(), "");
3275   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3276 }
3277 
3278 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3279   assert(VM_Version::supports_avx(), "");
3280   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3281 }
3282 
3283 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3284   assert(VM_Version::supports_avx(), "");
3285   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3286 }
3287 
3288 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3289   assert(VM_Version::supports_avx(), "");
3290   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3291 }
3292 
3293 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3294   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3295   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3296 }
3297 
3298 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3299   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3300   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3301 }
3302 
3303 void Assembler::andps(XMMRegister dst, Address src) {
3304   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3305   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3306 }
3307 
3308 void Assembler::andpd(XMMRegister dst, Address src) {
3309   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3310   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3311 }
3312 
3313 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3314   assert(VM_Version::supports_avx(), "");
3315   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3316 }
3317 
3318 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3319   assert(VM_Version::supports_avx(), "");
3320   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3321 }
3322 
3323 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3324   assert(VM_Version::supports_avx(), "");
3325   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3326 }
3327 
3328 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3329   assert(VM_Version::supports_avx(), "");
3330   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3331 }
3332 
3333 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3334   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3335   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3336 }
3337 
3338 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3339   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3340   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3341 }
3342 
3343 void Assembler::xorpd(XMMRegister dst, Address src) {
3344   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3345   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3346 }
3347 
3348 void Assembler::xorps(XMMRegister dst, Address src) {
3349   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3350   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3351 }
3352 
3353 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3354   assert(VM_Version::supports_avx(), "");
3355   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3356 }
3357 
3358 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3359   assert(VM_Version::supports_avx(), "");
3360   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3361 }
3362 
3363 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3364   assert(VM_Version::supports_avx(), "");
3365   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3366 }
3367 
3368 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3369   assert(VM_Version::supports_avx(), "");
3370   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3371 }
3372 
3373 
3374 // Integer vector arithmetic
3375 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3376   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3377   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3378 }
3379 
3380 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3381   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3382   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3383 }
3384 
3385 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3386   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3387   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3388 }
3389 
3390 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3391   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3392   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3393 }
3394 
3395 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3396   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3397   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3398 }
3399 
3400 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3401   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3402   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3403 }
3404 
3405 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3406   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3407   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3408 }
3409 
3410 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3411   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3412   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3413 }
3414 
3415 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3416   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3417   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3418 }
3419 
3420 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3421   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3422   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3423 }
3424 
3425 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3426   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3427   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3428 }
3429 
3430 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3431   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3432   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3433 }
3434 
3435 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3437   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3438 }
3439 
3440 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3441   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3442   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3443 }
3444 
3445 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3446   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3447   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3448 }
3449 
3450 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3451   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3452   emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3453 }
3454 
3455 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3456   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3457   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3458 }
3459 
3460 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3461   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3462   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3463 }
3464 
3465 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3466   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3467   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3468 }
3469 
3470 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3471   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3472   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3473 }
3474 
3475 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3476   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3477   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3478 }
3479 
3480 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3481   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3482   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3483 }
3484 
3485 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3486   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3487   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3488 }
3489 
3490 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3491   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3492   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3493 }
3494 
3495 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3496   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3497   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3498 }
3499 
3500 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3501   assert(VM_Version::supports_sse4_1(), "");
3502   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3503   emit_int8(0x40);
3504   emit_int8((unsigned char)(0xC0 | encode));
3505 }
3506 
3507 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3508   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3509   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3510 }
3511 
3512 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3513   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3514   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3515   emit_int8(0x40);
3516   emit_int8((unsigned char)(0xC0 | encode));
3517 }
3518 
3519 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3520   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3521   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3522 }
3523 
3524 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3525   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3526   InstructionMark im(this);
3527   int dst_enc = dst->encoding();
3528   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3529   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3530   emit_int8(0x40);
3531   emit_operand(dst, src);
3532 }
3533 
3534 // Shift packed integers left by specified number of bits.
3535 void Assembler::psllw(XMMRegister dst, int shift) {
3536   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3537   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3538   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3539   emit_int8(0x71);
3540   emit_int8((unsigned char)(0xC0 | encode));
3541   emit_int8(shift & 0xFF);
3542 }
3543 
3544 void Assembler::pslld(XMMRegister dst, int shift) {
3545   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3546   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3547   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3548   emit_int8(0x72);
3549   emit_int8((unsigned char)(0xC0 | encode));
3550   emit_int8(shift & 0xFF);
3551 }
3552 
3553 void Assembler::psllq(XMMRegister dst, int shift) {
3554   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3555   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3556   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3557   emit_int8(0x73);
3558   emit_int8((unsigned char)(0xC0 | encode));
3559   emit_int8(shift & 0xFF);
3560 }
3561 
3562 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3563   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3564   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3565 }
3566 
3567 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3568   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3569   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3570 }
3571 
3572 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3573   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3574   emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3575 }
3576 
3577 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3578   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3579   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3580   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3581   emit_int8(shift & 0xFF);
3582 }
3583 
3584 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3585   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3586   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3587   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3588   emit_int8(shift & 0xFF);
3589 }
3590 
3591 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3592   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3593   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3594   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3595   emit_int8(shift & 0xFF);
3596 }
3597 
3598 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3599   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3600   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3601 }
3602 
3603 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3604   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3605   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3606 }
3607 
3608 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3609   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3610   emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3611 }
3612 
3613 // Shift packed integers logically right by specified number of bits.
3614 void Assembler::psrlw(XMMRegister dst, int shift) {
3615   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3616   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3617   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3618   emit_int8(0x71);
3619   emit_int8((unsigned char)(0xC0 | encode));
3620   emit_int8(shift & 0xFF);
3621 }
3622 
3623 void Assembler::psrld(XMMRegister dst, int shift) {
3624   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3625   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3626   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3627   emit_int8(0x72);
3628   emit_int8((unsigned char)(0xC0 | encode));
3629   emit_int8(shift & 0xFF);
3630 }
3631 
3632 void Assembler::psrlq(XMMRegister dst, int shift) {
3633   // Do not confuse it with psrldq SSE2 instruction which
3634   // shifts 128 bit value in xmm register by number of bytes.
3635   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3636   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3637   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3638   emit_int8(0x73);
3639   emit_int8((unsigned char)(0xC0 | encode));
3640   emit_int8(shift & 0xFF);
3641 }
3642 
3643 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3645   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3646 }
3647 
3648 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3649   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3650   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3651 }
3652 
3653 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3654   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3655   emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3656 }
3657 
3658 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3659   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3660   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3661   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3662   emit_int8(shift & 0xFF);
3663 }
3664 
3665 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3666   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3667   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3668   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3669   emit_int8(shift & 0xFF);
3670 }
3671 
3672 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3673   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3674   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3675   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3676   emit_int8(shift & 0xFF);
3677 }
3678 
3679 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3680   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3681   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3682 }
3683 
3684 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3685   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3686   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3687 }
3688 
3689 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3690   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3691   emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3692 }
3693 
3694 // Shift packed integers arithmetically right by specified number of bits.
3695 void Assembler::psraw(XMMRegister dst, int shift) {
3696   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3697   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3698   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3699   emit_int8(0x71);
3700   emit_int8((unsigned char)(0xC0 | encode));
3701   emit_int8(shift & 0xFF);
3702 }
3703 
3704 void Assembler::psrad(XMMRegister dst, int shift) {
3705   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3706   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3707   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3708   emit_int8(0x72);
3709   emit_int8((unsigned char)(0xC0 | encode));
3710   emit_int8(shift & 0xFF);
3711 }
3712 
3713 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3714   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3715   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3716 }
3717 
3718 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3719   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3720   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3721 }
3722 
3723 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3724   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3725   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3726   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3727   emit_int8(shift & 0xFF);
3728 }
3729 
3730 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3731   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3732   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3733   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3734   emit_int8(shift & 0xFF);
3735 }
3736 
3737 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3738   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3739   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3740 }
3741 
3742 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3743   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3744   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3745 }
3746 
3747 
3748 // AND packed integers
3749 void Assembler::pand(XMMRegister dst, XMMRegister src) {
3750   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3751   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3752 }
3753 
3754 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3755   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3756   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3757 }
3758 
3759 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3760   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3761   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3762 }
3763 
3764 void Assembler::por(XMMRegister dst, XMMRegister src) {
3765   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3766   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3767 }
3768 
3769 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3770   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3771   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3772 }
3773 
3774 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3775   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3776   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3777 }
3778 
3779 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3781   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3782 }
3783 
3784 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3785   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3786   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3787 }
3788 
3789 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3790   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3791   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3792 }
3793 
3794 
3795 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3796   assert(VM_Version::supports_avx(), "");
3797   bool vector256 = true;
3798   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3799   emit_int8(0x18);
3800   emit_int8((unsigned char)(0xC0 | encode));
3801   // 0x00 - insert into lower 128 bits
3802   // 0x01 - insert into upper 128 bits
3803   emit_int8(0x01);
3804 }
3805 
3806 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3807   assert(VM_Version::supports_avx(), "");
3808   InstructionMark im(this);
3809   bool vector256 = true;
3810   assert(dst != xnoreg, "sanity");
3811   int dst_enc = dst->encoding();
3812   // swap src<->dst for encoding
3813   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3814   emit_int8(0x18);
3815   emit_operand(dst, src);
3816   // 0x01 - insert into upper 128 bits
3817   emit_int8(0x01);
3818 }
3819 
3820 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3821   assert(VM_Version::supports_avx(), "");
3822   InstructionMark im(this);
3823   bool vector256 = true;
3824   assert(src != xnoreg, "sanity");
3825   int src_enc = src->encoding();
3826   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3827   emit_int8(0x19);
3828   emit_operand(src, dst);
3829   // 0x01 - extract from upper 128 bits
3830   emit_int8(0x01);
3831 }
3832 
3833 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3834   assert(VM_Version::supports_avx2(), "");
3835   bool vector256 = true;
3836   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3837   emit_int8(0x38);
3838   emit_int8((unsigned char)(0xC0 | encode));
3839   // 0x00 - insert into lower 128 bits
3840   // 0x01 - insert into upper 128 bits
3841   emit_int8(0x01);
3842 }
3843 
3844 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3845   assert(VM_Version::supports_avx2(), "");
3846   InstructionMark im(this);
3847   bool vector256 = true;
3848   assert(dst != xnoreg, "sanity");
3849   int dst_enc = dst->encoding();
3850   // swap src<->dst for encoding
3851   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3852   emit_int8(0x38);
3853   emit_operand(dst, src);
3854   // 0x01 - insert into upper 128 bits
3855   emit_int8(0x01);
3856 }
3857 
3858 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3859   assert(VM_Version::supports_avx2(), "");
3860   InstructionMark im(this);
3861   bool vector256 = true;
3862   assert(src != xnoreg, "sanity");
3863   int src_enc = src->encoding();
3864   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3865   emit_int8(0x39);
3866   emit_operand(src, dst);
3867   // 0x01 - extract from upper 128 bits
3868   emit_int8(0x01);
3869 }
3870 
3871 // duplicate 4-bytes integer data from src into 8 locations in dest
3872 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3873   assert(VM_Version::supports_avx2(), "");
3874   bool vector256 = true;
3875   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3876   emit_int8(0x58);
3877   emit_int8((unsigned char)(0xC0 | encode));
3878 }
3879 
3880 // Carry-Less Multiplication Quadword
3881 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
3882   assert(VM_Version::supports_clmul(), "");
3883   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
3884   emit_int8(0x44);
3885   emit_int8((unsigned char)(0xC0 | encode));
3886   emit_int8((unsigned char)mask);
3887 }
3888 
3889 // Carry-Less Multiplication Quadword
3890 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
3891   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
3892   bool vector256 = false;
3893   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3894   emit_int8(0x44);
3895   emit_int8((unsigned char)(0xC0 | encode));
3896   emit_int8((unsigned char)mask);
3897 }
3898 
3899 void Assembler::vzeroupper() {
3900   assert(VM_Version::supports_avx(), "");
3901   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3902   emit_int8(0x77);
3903 }
3904 
3905 
3906 #ifndef _LP64
3907 // 32bit only pieces of the assembler
3908 
3909 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3910   // NO PREFIX AS NEVER 64BIT
3911   InstructionMark im(this);
3912   emit_int8((unsigned char)0x81);
3913   emit_int8((unsigned char)(0xF8 | src1->encoding()));
3914   emit_data(imm32, rspec, 0);
3915 }
3916 
3917 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3918   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3919   InstructionMark im(this);
3920   emit_int8((unsigned char)0x81);
3921   emit_operand(rdi, src1);
3922   emit_data(imm32, rspec, 0);
3923 }
3924 
3925 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3926 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3927 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3928 void Assembler::cmpxchg8(Address adr) {
3929   InstructionMark im(this);
3930   emit_int8(0x0F);
3931   emit_int8((unsigned char)0xC7);
3932   emit_operand(rcx, adr);
3933 }
3934 
3935 void Assembler::decl(Register dst) {
3936   // Don't use it directly. Use MacroAssembler::decrementl() instead.
3937  emit_int8(0x48 | dst->encoding());
3938 }
3939 
3940 #endif // _LP64
3941 
3942 // 64bit typically doesn't use the x87 but needs to for the trig funcs
3943 
3944 void Assembler::fabs() {
3945   emit_int8((unsigned char)0xD9);
3946   emit_int8((unsigned char)0xE1);
3947 }
3948 
3949 void Assembler::fadd(int i) {
3950   emit_farith(0xD8, 0xC0, i);
3951 }
3952 
3953 void Assembler::fadd_d(Address src) {
3954   InstructionMark im(this);
3955   emit_int8((unsigned char)0xDC);
3956   emit_operand32(rax, src);
3957 }
3958 
3959 void Assembler::fadd_s(Address src) {
3960   InstructionMark im(this);
3961   emit_int8((unsigned char)0xD8);
3962   emit_operand32(rax, src);
3963 }
3964 
3965 void Assembler::fadda(int i) {
3966   emit_farith(0xDC, 0xC0, i);
3967 }
3968 
3969 void Assembler::faddp(int i) {
3970   emit_farith(0xDE, 0xC0, i);
3971 }
3972 
3973 void Assembler::fchs() {
3974   emit_int8((unsigned char)0xD9);
3975   emit_int8((unsigned char)0xE0);
3976 }
3977 
3978 void Assembler::fcom(int i) {
3979   emit_farith(0xD8, 0xD0, i);
3980 }
3981 
3982 void Assembler::fcomp(int i) {
3983   emit_farith(0xD8, 0xD8, i);
3984 }
3985 
3986 void Assembler::fcomp_d(Address src) {
3987   InstructionMark im(this);
3988   emit_int8((unsigned char)0xDC);
3989   emit_operand32(rbx, src);
3990 }
3991 
3992 void Assembler::fcomp_s(Address src) {
3993   InstructionMark im(this);
3994   emit_int8((unsigned char)0xD8);
3995   emit_operand32(rbx, src);
3996 }
3997 
3998 void Assembler::fcompp() {
3999   emit_int8((unsigned char)0xDE);
4000   emit_int8((unsigned char)0xD9);
4001 }
4002 
4003 void Assembler::fcos() {
4004   emit_int8((unsigned char)0xD9);
4005   emit_int8((unsigned char)0xFF);
4006 }
4007 
4008 void Assembler::fdecstp() {
4009   emit_int8((unsigned char)0xD9);
4010   emit_int8((unsigned char)0xF6);
4011 }
4012 
4013 void Assembler::fdiv(int i) {
4014   emit_farith(0xD8, 0xF0, i);
4015 }
4016 
4017 void Assembler::fdiv_d(Address src) {
4018   InstructionMark im(this);
4019   emit_int8((unsigned char)0xDC);
4020   emit_operand32(rsi, src);
4021 }
4022 
4023 void Assembler::fdiv_s(Address src) {
4024   InstructionMark im(this);
4025   emit_int8((unsigned char)0xD8);
4026   emit_operand32(rsi, src);
4027 }
4028 
4029 void Assembler::fdiva(int i) {
4030   emit_farith(0xDC, 0xF8, i);
4031 }
4032 
4033 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
4034 //       is erroneous for some of the floating-point instructions below.
4035 
4036 void Assembler::fdivp(int i) {
4037   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
4038 }
4039 
4040 void Assembler::fdivr(int i) {
4041   emit_farith(0xD8, 0xF8, i);
4042 }
4043 
4044 void Assembler::fdivr_d(Address src) {
4045   InstructionMark im(this);
4046   emit_int8((unsigned char)0xDC);
4047   emit_operand32(rdi, src);
4048 }
4049 
4050 void Assembler::fdivr_s(Address src) {
4051   InstructionMark im(this);
4052   emit_int8((unsigned char)0xD8);
4053   emit_operand32(rdi, src);
4054 }
4055 
4056 void Assembler::fdivra(int i) {
4057   emit_farith(0xDC, 0xF0, i);
4058 }
4059 
4060 void Assembler::fdivrp(int i) {
4061   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
4062 }
4063 
4064 void Assembler::ffree(int i) {
4065   emit_farith(0xDD, 0xC0, i);
4066 }
4067 
4068 void Assembler::fild_d(Address adr) {
4069   InstructionMark im(this);
4070   emit_int8((unsigned char)0xDF);
4071   emit_operand32(rbp, adr);
4072 }
4073 
4074 void Assembler::fild_s(Address adr) {
4075   InstructionMark im(this);
4076   emit_int8((unsigned char)0xDB);
4077   emit_operand32(rax, adr);
4078 }
4079 
4080 void Assembler::fincstp() {
4081   emit_int8((unsigned char)0xD9);
4082   emit_int8((unsigned char)0xF7);
4083 }
4084 
4085 void Assembler::finit() {
4086   emit_int8((unsigned char)0x9B);
4087   emit_int8((unsigned char)0xDB);
4088   emit_int8((unsigned char)0xE3);
4089 }
4090 
4091 void Assembler::fist_s(Address adr) {
4092   InstructionMark im(this);
4093   emit_int8((unsigned char)0xDB);
4094   emit_operand32(rdx, adr);
4095 }
4096 
4097 void Assembler::fistp_d(Address adr) {
4098   InstructionMark im(this);
4099   emit_int8((unsigned char)0xDF);
4100   emit_operand32(rdi, adr);
4101 }
4102 
4103 void Assembler::fistp_s(Address adr) {
4104   InstructionMark im(this);
4105   emit_int8((unsigned char)0xDB);
4106   emit_operand32(rbx, adr);
4107 }
4108 
4109 void Assembler::fld1() {
4110   emit_int8((unsigned char)0xD9);
4111   emit_int8((unsigned char)0xE8);
4112 }
4113 
4114 void Assembler::fld_d(Address adr) {
4115   InstructionMark im(this);
4116   emit_int8((unsigned char)0xDD);
4117   emit_operand32(rax, adr);
4118 }
4119 
4120 void Assembler::fld_s(Address adr) {
4121   InstructionMark im(this);
4122   emit_int8((unsigned char)0xD9);
4123   emit_operand32(rax, adr);
4124 }
4125 
4126 
4127 void Assembler::fld_s(int index) {
4128   emit_farith(0xD9, 0xC0, index);
4129 }
4130 
4131 void Assembler::fld_x(Address adr) {
4132   InstructionMark im(this);
4133   emit_int8((unsigned char)0xDB);
4134   emit_operand32(rbp, adr);
4135 }
4136 
4137 void Assembler::fldcw(Address src) {
4138   InstructionMark im(this);
4139   emit_int8((unsigned char)0xD9);
4140   emit_operand32(rbp, src);
4141 }
4142 
4143 void Assembler::fldenv(Address src) {
4144   InstructionMark im(this);
4145   emit_int8((unsigned char)0xD9);
4146   emit_operand32(rsp, src);
4147 }
4148 
4149 void Assembler::fldlg2() {
4150   emit_int8((unsigned char)0xD9);
4151   emit_int8((unsigned char)0xEC);
4152 }
4153 
4154 void Assembler::fldln2() {
4155   emit_int8((unsigned char)0xD9);
4156   emit_int8((unsigned char)0xED);
4157 }
4158 
4159 void Assembler::fldz() {
4160   emit_int8((unsigned char)0xD9);
4161   emit_int8((unsigned char)0xEE);
4162 }
4163 
4164 void Assembler::flog() {
4165   fldln2();
4166   fxch();
4167   fyl2x();
4168 }
4169 
4170 void Assembler::flog10() {
4171   fldlg2();
4172   fxch();
4173   fyl2x();
4174 }
4175 
4176 void Assembler::fmul(int i) {
4177   emit_farith(0xD8, 0xC8, i);
4178 }
4179 
4180 void Assembler::fmul_d(Address src) {
4181   InstructionMark im(this);
4182   emit_int8((unsigned char)0xDC);
4183   emit_operand32(rcx, src);
4184 }
4185 
4186 void Assembler::fmul_s(Address src) {
4187   InstructionMark im(this);
4188   emit_int8((unsigned char)0xD8);
4189   emit_operand32(rcx, src);
4190 }
4191 
4192 void Assembler::fmula(int i) {
4193   emit_farith(0xDC, 0xC8, i);
4194 }
4195 
4196 void Assembler::fmulp(int i) {
4197   emit_farith(0xDE, 0xC8, i);
4198 }
4199 
4200 void Assembler::fnsave(Address dst) {
4201   InstructionMark im(this);
4202   emit_int8((unsigned char)0xDD);
4203   emit_operand32(rsi, dst);
4204 }
4205 
4206 void Assembler::fnstcw(Address src) {
4207   InstructionMark im(this);
4208   emit_int8((unsigned char)0x9B);
4209   emit_int8((unsigned char)0xD9);
4210   emit_operand32(rdi, src);
4211 }
4212 
4213 void Assembler::fnstsw_ax() {
4214   emit_int8((unsigned char)0xDF);
4215   emit_int8((unsigned char)0xE0);
4216 }
4217 
4218 void Assembler::fprem() {
4219   emit_int8((unsigned char)0xD9);
4220   emit_int8((unsigned char)0xF8);
4221 }
4222 
4223 void Assembler::fprem1() {
4224   emit_int8((unsigned char)0xD9);
4225   emit_int8((unsigned char)0xF5);
4226 }
4227 
4228 void Assembler::frstor(Address src) {
4229   InstructionMark im(this);
4230   emit_int8((unsigned char)0xDD);
4231   emit_operand32(rsp, src);
4232 }
4233 
4234 void Assembler::fsin() {
4235   emit_int8((unsigned char)0xD9);
4236   emit_int8((unsigned char)0xFE);
4237 }
4238 
4239 void Assembler::fsqrt() {
4240   emit_int8((unsigned char)0xD9);
4241   emit_int8((unsigned char)0xFA);
4242 }
4243 
4244 void Assembler::fst_d(Address adr) {
4245   InstructionMark im(this);
4246   emit_int8((unsigned char)0xDD);
4247   emit_operand32(rdx, adr);
4248 }
4249 
4250 void Assembler::fst_s(Address adr) {
4251   InstructionMark im(this);
4252   emit_int8((unsigned char)0xD9);
4253   emit_operand32(rdx, adr);
4254 }
4255 
4256 void Assembler::fstp_d(Address adr) {
4257   InstructionMark im(this);
4258   emit_int8((unsigned char)0xDD);
4259   emit_operand32(rbx, adr);
4260 }
4261 
4262 void Assembler::fstp_d(int index) {
4263   emit_farith(0xDD, 0xD8, index);
4264 }
4265 
4266 void Assembler::fstp_s(Address adr) {
4267   InstructionMark im(this);
4268   emit_int8((unsigned char)0xD9);
4269   emit_operand32(rbx, adr);
4270 }
4271 
4272 void Assembler::fstp_x(Address adr) {
4273   InstructionMark im(this);
4274   emit_int8((unsigned char)0xDB);
4275   emit_operand32(rdi, adr);
4276 }
4277 
4278 void Assembler::fsub(int i) {
4279   emit_farith(0xD8, 0xE0, i);
4280 }
4281 
4282 void Assembler::fsub_d(Address src) {
4283   InstructionMark im(this);
4284   emit_int8((unsigned char)0xDC);
4285   emit_operand32(rsp, src);
4286 }
4287 
4288 void Assembler::fsub_s(Address src) {
4289   InstructionMark im(this);
4290   emit_int8((unsigned char)0xD8);
4291   emit_operand32(rsp, src);
4292 }
4293 
4294 void Assembler::fsuba(int i) {
4295   emit_farith(0xDC, 0xE8, i);
4296 }
4297 
4298 void Assembler::fsubp(int i) {
4299   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4300 }
4301 
4302 void Assembler::fsubr(int i) {
4303   emit_farith(0xD8, 0xE8, i);
4304 }
4305 
4306 void Assembler::fsubr_d(Address src) {
4307   InstructionMark im(this);
4308   emit_int8((unsigned char)0xDC);
4309   emit_operand32(rbp, src);
4310 }
4311 
4312 void Assembler::fsubr_s(Address src) {
4313   InstructionMark im(this);
4314   emit_int8((unsigned char)0xD8);
4315   emit_operand32(rbp, src);
4316 }
4317 
4318 void Assembler::fsubra(int i) {
4319   emit_farith(0xDC, 0xE0, i);
4320 }
4321 
4322 void Assembler::fsubrp(int i) {
4323   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4324 }
4325 
4326 void Assembler::ftan() {
4327   emit_int8((unsigned char)0xD9);
4328   emit_int8((unsigned char)0xF2);
4329   emit_int8((unsigned char)0xDD);
4330   emit_int8((unsigned char)0xD8);
4331 }
4332 
4333 void Assembler::ftst() {
4334   emit_int8((unsigned char)0xD9);
4335   emit_int8((unsigned char)0xE4);
4336 }
4337 
4338 void Assembler::fucomi(int i) {
4339   // make sure the instruction is supported (introduced for P6, together with cmov)
4340   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4341   emit_farith(0xDB, 0xE8, i);
4342 }
4343 
4344 void Assembler::fucomip(int i) {
4345   // make sure the instruction is supported (introduced for P6, together with cmov)
4346   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4347   emit_farith(0xDF, 0xE8, i);
4348 }
4349 
4350 void Assembler::fwait() {
4351   emit_int8((unsigned char)0x9B);
4352 }
4353 
4354 void Assembler::fxch(int i) {
4355   emit_farith(0xD9, 0xC8, i);
4356 }
4357 
4358 void Assembler::fyl2x() {
4359   emit_int8((unsigned char)0xD9);
4360   emit_int8((unsigned char)0xF1);
4361 }
4362 
4363 void Assembler::frndint() {
4364   emit_int8((unsigned char)0xD9);
4365   emit_int8((unsigned char)0xFC);
4366 }
4367 
4368 void Assembler::f2xm1() {
4369   emit_int8((unsigned char)0xD9);
4370   emit_int8((unsigned char)0xF0);
4371 }
4372 
4373 void Assembler::fldl2e() {
4374   emit_int8((unsigned char)0xD9);
4375   emit_int8((unsigned char)0xEA);
4376 }
4377 
4378 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4379 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4380 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4381 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4382 
4383 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4384 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4385   if (pre > 0) {
4386     emit_int8(simd_pre[pre]);
4387   }
4388   if (rex_w) {
4389     prefixq(adr, xreg);
4390   } else {
4391     prefix(adr, xreg);
4392   }
4393   if (opc > 0) {
4394     emit_int8(0x0F);
4395     int opc2 = simd_opc[opc];
4396     if (opc2 > 0) {
4397       emit_int8(opc2);
4398     }
4399   }
4400 }
4401 
4402 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4403   if (pre > 0) {
4404     emit_int8(simd_pre[pre]);
4405   }
4406   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4407                           prefix_and_encode(dst_enc, src_enc);
4408   if (opc > 0) {
4409     emit_int8(0x0F);
4410     int opc2 = simd_opc[opc];
4411     if (opc2 > 0) {
4412       emit_int8(opc2);
4413     }
4414   }
4415   return encode;
4416 }
4417 
4418 
4419 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4420   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4421     prefix(VEX_3bytes);
4422 
4423     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4424     byte1 = (~byte1) & 0xE0;
4425     byte1 |= opc;
4426     emit_int8(byte1);
4427 
4428     int byte2 = ((~nds_enc) & 0xf) << 3;
4429     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4430     emit_int8(byte2);
4431   } else {
4432     prefix(VEX_2bytes);
4433 
4434     int byte1 = vex_r ? VEX_R : 0;
4435     byte1 = (~byte1) & 0x80;
4436     byte1 |= ((~nds_enc) & 0xf) << 3;
4437     byte1 |= (vector256 ? 4 : 0) | pre;
4438     emit_int8(byte1);
4439   }
4440 }
4441 
4442 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4443   bool vex_r = (xreg_enc >= 8);
4444   bool vex_b = adr.base_needs_rex();
4445   bool vex_x = adr.index_needs_rex();
4446   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4447 }
4448 
4449 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4450   bool vex_r = (dst_enc >= 8);
4451   bool vex_b = (src_enc >= 8);
4452   bool vex_x = false;
4453   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4454   return (((dst_enc & 7) << 3) | (src_enc & 7));
4455 }
4456 
4457 
4458 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4459   if (UseAVX > 0) {
4460     int xreg_enc = xreg->encoding();
4461     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4462     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4463   } else {
4464     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4465     rex_prefix(adr, xreg, pre, opc, rex_w);
4466   }
4467 }
4468 
4469 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4470   int dst_enc = dst->encoding();
4471   int src_enc = src->encoding();
4472   if (UseAVX > 0) {
4473     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4474     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4475   } else {
4476     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4477     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4478   }
4479 }
4480 
4481 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4482   InstructionMark im(this);
4483   simd_prefix(dst, dst, src, pre);
4484   emit_int8(opcode);
4485   emit_operand(dst, src);
4486 }
4487 
4488 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4489   int encode = simd_prefix_and_encode(dst, dst, src, pre);
4490   emit_int8(opcode);
4491   emit_int8((unsigned char)(0xC0 | encode));
4492 }
4493 
4494 // Versions with no second source register (non-destructive source).
4495 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4496   InstructionMark im(this);
4497   simd_prefix(dst, xnoreg, src, pre);
4498   emit_int8(opcode);
4499   emit_operand(dst, src);
4500 }
4501 
4502 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4503   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4504   emit_int8(opcode);
4505   emit_int8((unsigned char)(0xC0 | encode));
4506 }
4507 
4508 // 3-operands AVX instructions
4509 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4510                                Address src, VexSimdPrefix pre, bool vector256) {
4511   InstructionMark im(this);
4512   vex_prefix(dst, nds, src, pre, vector256);
4513   emit_int8(opcode);
4514   emit_operand(dst, src);
4515 }
4516 
4517 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4518                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
4519   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4520   emit_int8(opcode);
4521   emit_int8((unsigned char)(0xC0 | encode));
4522 }
4523 
4524 #ifndef _LP64
4525 
4526 void Assembler::incl(Register dst) {
4527   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4528   emit_int8(0x40 | dst->encoding());
4529 }
4530 
4531 void Assembler::lea(Register dst, Address src) {
4532   leal(dst, src);
4533 }
4534 
4535 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4536   InstructionMark im(this);
4537   emit_int8((unsigned char)0xC7);
4538   emit_operand(rax, dst);
4539   emit_data((int)imm32, rspec, 0);
4540 }
4541 
4542 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4543   InstructionMark im(this);
4544   int encode = prefix_and_encode(dst->encoding());
4545   emit_int8((unsigned char)(0xB8 | encode));
4546   emit_data((int)imm32, rspec, 0);
4547 }
4548 
4549 void Assembler::popa() { // 32bit
4550   emit_int8(0x61);
4551 }
4552 
4553 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4554   InstructionMark im(this);
4555   emit_int8(0x68);
4556   emit_data(imm32, rspec, 0);
4557 }
4558 
4559 void Assembler::pusha() { // 32bit
4560   emit_int8(0x60);
4561 }
4562 
4563 void Assembler::set_byte_if_not_zero(Register dst) {
4564   emit_int8(0x0F);
4565   emit_int8((unsigned char)0x95);
4566   emit_int8((unsigned char)(0xE0 | dst->encoding()));
4567 }
4568 
4569 void Assembler::shldl(Register dst, Register src) {
4570   emit_int8(0x0F);
4571   emit_int8((unsigned char)0xA5);
4572   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4573 }
4574 
4575 void Assembler::shrdl(Register dst, Register src) {
4576   emit_int8(0x0F);
4577   emit_int8((unsigned char)0xAD);
4578   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4579 }
4580 
4581 #else // LP64
4582 
4583 void Assembler::set_byte_if_not_zero(Register dst) {
4584   int enc = prefix_and_encode(dst->encoding(), true);
4585   emit_int8(0x0F);
4586   emit_int8((unsigned char)0x95);
4587   emit_int8((unsigned char)(0xE0 | enc));
4588 }
4589 
4590 // 64bit only pieces of the assembler
4591 // This should only be used by 64bit instructions that can use rip-relative
4592 // it cannot be used by instructions that want an immediate value.
4593 
4594 bool Assembler::reachable(AddressLiteral adr) {
4595   int64_t disp;
4596   // None will force a 64bit literal to the code stream. Likely a placeholder
4597   // for something that will be patched later and we need to certain it will
4598   // always be reachable.
4599   if (adr.reloc() == relocInfo::none) {
4600     return false;
4601   }
4602   if (adr.reloc() == relocInfo::internal_word_type) {
4603     // This should be rip relative and easily reachable.
4604     return true;
4605   }
4606   if (adr.reloc() == relocInfo::virtual_call_type ||
4607       adr.reloc() == relocInfo::opt_virtual_call_type ||
4608       adr.reloc() == relocInfo::static_call_type ||
4609       adr.reloc() == relocInfo::static_stub_type ) {
4610     // This should be rip relative within the code cache and easily
4611     // reachable until we get huge code caches. (At which point
4612     // ic code is going to have issues).
4613     return true;
4614   }
4615   if (adr.reloc() != relocInfo::external_word_type &&
4616       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4617       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4618       adr.reloc() != relocInfo::runtime_call_type ) {
4619     return false;
4620   }
4621 
4622   // Stress the correction code
4623   if (ForceUnreachable) {
4624     // Must be runtimecall reloc, see if it is in the codecache
4625     // Flipping stuff in the codecache to be unreachable causes issues
4626     // with things like inline caches where the additional instructions
4627     // are not handled.
4628     if (CodeCache::find_blob(adr._target) == NULL) {
4629       return false;
4630     }
4631   }
4632   // For external_word_type/runtime_call_type if it is reachable from where we
4633   // are now (possibly a temp buffer) and where we might end up
4634   // anywhere in the codeCache then we are always reachable.
4635   // This would have to change if we ever save/restore shared code
4636   // to be more pessimistic.
4637   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4638   if (!is_simm32(disp)) return false;
4639   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4640   if (!is_simm32(disp)) return false;
4641 
4642   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4643 
4644   // Because rip relative is a disp + address_of_next_instruction and we
4645   // don't know the value of address_of_next_instruction we apply a fudge factor
4646   // to make sure we will be ok no matter the size of the instruction we get placed into.
4647   // We don't have to fudge the checks above here because they are already worst case.
4648 
4649   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4650   // + 4 because better safe than sorry.
4651   const int fudge = 12 + 4;
4652   if (disp < 0) {
4653     disp -= fudge;
4654   } else {
4655     disp += fudge;
4656   }
4657   return is_simm32(disp);
4658 }
4659 
4660 // Check if the polling page is not reachable from the code cache using rip-relative
4661 // addressing.
4662 bool Assembler::is_polling_page_far() {
4663   intptr_t addr = (intptr_t)os::get_polling_page();
4664   return ForceUnreachable ||
4665          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4666          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4667 }
4668 
4669 void Assembler::emit_data64(jlong data,
4670                             relocInfo::relocType rtype,
4671                             int format) {
4672   if (rtype == relocInfo::none) {
4673     emit_int64(data);
4674   } else {
4675     emit_data64(data, Relocation::spec_simple(rtype), format);
4676   }
4677 }
4678 
4679 void Assembler::emit_data64(jlong data,
4680                             RelocationHolder const& rspec,
4681                             int format) {
4682   assert(imm_operand == 0, "default format must be immediate in this file");
4683   assert(imm_operand == format, "must be immediate");
4684   assert(inst_mark() != NULL, "must be inside InstructionMark");
4685   // Do not use AbstractAssembler::relocate, which is not intended for
4686   // embedded words.  Instead, relocate to the enclosing instruction.
4687   code_section()->relocate(inst_mark(), rspec, format);
4688 #ifdef ASSERT
4689   check_relocation(rspec, format);
4690 #endif
4691   emit_int64(data);
4692 }
4693 
4694 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4695   if (reg_enc >= 8) {
4696     prefix(REX_B);
4697     reg_enc -= 8;
4698   } else if (byteinst && reg_enc >= 4) {
4699     prefix(REX);
4700   }
4701   return reg_enc;
4702 }
4703 
4704 int Assembler::prefixq_and_encode(int reg_enc) {
4705   if (reg_enc < 8) {
4706     prefix(REX_W);
4707   } else {
4708     prefix(REX_WB);
4709     reg_enc -= 8;
4710   }
4711   return reg_enc;
4712 }
4713 
4714 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4715   if (dst_enc < 8) {
4716     if (src_enc >= 8) {
4717       prefix(REX_B);
4718       src_enc -= 8;
4719     } else if (byteinst && src_enc >= 4) {
4720       prefix(REX);
4721     }
4722   } else {
4723     if (src_enc < 8) {
4724       prefix(REX_R);
4725     } else {
4726       prefix(REX_RB);
4727       src_enc -= 8;
4728     }
4729     dst_enc -= 8;
4730   }
4731   return dst_enc << 3 | src_enc;
4732 }
4733 
4734 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4735   if (dst_enc < 8) {
4736     if (src_enc < 8) {
4737       prefix(REX_W);
4738     } else {
4739       prefix(REX_WB);
4740       src_enc -= 8;
4741     }
4742   } else {
4743     if (src_enc < 8) {
4744       prefix(REX_WR);
4745     } else {
4746       prefix(REX_WRB);
4747       src_enc -= 8;
4748     }
4749     dst_enc -= 8;
4750   }
4751   return dst_enc << 3 | src_enc;
4752 }
4753 
4754 void Assembler::prefix(Register reg) {
4755   if (reg->encoding() >= 8) {
4756     prefix(REX_B);
4757   }
4758 }
4759 
4760 void Assembler::prefix(Address adr) {
4761   if (adr.base_needs_rex()) {
4762     if (adr.index_needs_rex()) {
4763       prefix(REX_XB);
4764     } else {
4765       prefix(REX_B);
4766     }
4767   } else {
4768     if (adr.index_needs_rex()) {
4769       prefix(REX_X);
4770     }
4771   }
4772 }
4773 
4774 void Assembler::prefixq(Address adr) {
4775   if (adr.base_needs_rex()) {
4776     if (adr.index_needs_rex()) {
4777       prefix(REX_WXB);
4778     } else {
4779       prefix(REX_WB);
4780     }
4781   } else {
4782     if (adr.index_needs_rex()) {
4783       prefix(REX_WX);
4784     } else {
4785       prefix(REX_W);
4786     }
4787   }
4788 }
4789 
4790 
4791 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4792   if (reg->encoding() < 8) {
4793     if (adr.base_needs_rex()) {
4794       if (adr.index_needs_rex()) {
4795         prefix(REX_XB);
4796       } else {
4797         prefix(REX_B);
4798       }
4799     } else {
4800       if (adr.index_needs_rex()) {
4801         prefix(REX_X);
4802       } else if (byteinst && reg->encoding() >= 4 ) {
4803         prefix(REX);
4804       }
4805     }
4806   } else {
4807     if (adr.base_needs_rex()) {
4808       if (adr.index_needs_rex()) {
4809         prefix(REX_RXB);
4810       } else {
4811         prefix(REX_RB);
4812       }
4813     } else {
4814       if (adr.index_needs_rex()) {
4815         prefix(REX_RX);
4816       } else {
4817         prefix(REX_R);
4818       }
4819     }
4820   }
4821 }
4822 
4823 void Assembler::prefixq(Address adr, Register src) {
4824   if (src->encoding() < 8) {
4825     if (adr.base_needs_rex()) {
4826       if (adr.index_needs_rex()) {
4827         prefix(REX_WXB);
4828       } else {
4829         prefix(REX_WB);
4830       }
4831     } else {
4832       if (adr.index_needs_rex()) {
4833         prefix(REX_WX);
4834       } else {
4835         prefix(REX_W);
4836       }
4837     }
4838   } else {
4839     if (adr.base_needs_rex()) {
4840       if (adr.index_needs_rex()) {
4841         prefix(REX_WRXB);
4842       } else {
4843         prefix(REX_WRB);
4844       }
4845     } else {
4846       if (adr.index_needs_rex()) {
4847         prefix(REX_WRX);
4848       } else {
4849         prefix(REX_WR);
4850       }
4851     }
4852   }
4853 }
4854 
4855 void Assembler::prefix(Address adr, XMMRegister reg) {
4856   if (reg->encoding() < 8) {
4857     if (adr.base_needs_rex()) {
4858       if (adr.index_needs_rex()) {
4859         prefix(REX_XB);
4860       } else {
4861         prefix(REX_B);
4862       }
4863     } else {
4864       if (adr.index_needs_rex()) {
4865         prefix(REX_X);
4866       }
4867     }
4868   } else {
4869     if (adr.base_needs_rex()) {
4870       if (adr.index_needs_rex()) {
4871         prefix(REX_RXB);
4872       } else {
4873         prefix(REX_RB);
4874       }
4875     } else {
4876       if (adr.index_needs_rex()) {
4877         prefix(REX_RX);
4878       } else {
4879         prefix(REX_R);
4880       }
4881     }
4882   }
4883 }
4884 
4885 void Assembler::prefixq(Address adr, XMMRegister src) {
4886   if (src->encoding() < 8) {
4887     if (adr.base_needs_rex()) {
4888       if (adr.index_needs_rex()) {
4889         prefix(REX_WXB);
4890       } else {
4891         prefix(REX_WB);
4892       }
4893     } else {
4894       if (adr.index_needs_rex()) {
4895         prefix(REX_WX);
4896       } else {
4897         prefix(REX_W);
4898       }
4899     }
4900   } else {
4901     if (adr.base_needs_rex()) {
4902       if (adr.index_needs_rex()) {
4903         prefix(REX_WRXB);
4904       } else {
4905         prefix(REX_WRB);
4906       }
4907     } else {
4908       if (adr.index_needs_rex()) {
4909         prefix(REX_WRX);
4910       } else {
4911         prefix(REX_WR);
4912       }
4913     }
4914   }
4915 }
4916 
4917 void Assembler::adcq(Register dst, int32_t imm32) {
4918   (void) prefixq_and_encode(dst->encoding());
4919   emit_arith(0x81, 0xD0, dst, imm32);
4920 }
4921 
4922 void Assembler::adcq(Register dst, Address src) {
4923   InstructionMark im(this);
4924   prefixq(src, dst);
4925   emit_int8(0x13);
4926   emit_operand(dst, src);
4927 }
4928 
4929 void Assembler::adcq(Register dst, Register src) {
4930   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4931   emit_arith(0x13, 0xC0, dst, src);
4932 }
4933 
4934 void Assembler::addq(Address dst, int32_t imm32) {
4935   InstructionMark im(this);
4936   prefixq(dst);
4937   emit_arith_operand(0x81, rax, dst,imm32);
4938 }
4939 
4940 void Assembler::addq(Address dst, Register src) {
4941   InstructionMark im(this);
4942   prefixq(dst, src);
4943   emit_int8(0x01);
4944   emit_operand(src, dst);
4945 }
4946 
4947 void Assembler::addq(Register dst, int32_t imm32) {
4948   (void) prefixq_and_encode(dst->encoding());
4949   emit_arith(0x81, 0xC0, dst, imm32);
4950 }
4951 
4952 void Assembler::addq(Register dst, Address src) {
4953   InstructionMark im(this);
4954   prefixq(src, dst);
4955   emit_int8(0x03);
4956   emit_operand(dst, src);
4957 }
4958 
4959 void Assembler::addq(Register dst, Register src) {
4960   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4961   emit_arith(0x03, 0xC0, dst, src);
4962 }
4963 
4964 void Assembler::adcxq(Register dst, Register src) {
4965   //assert(VM_Version::supports_adx(), "adx instructions not supported");
4966   emit_int8((unsigned char)0x66);
4967   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4968   emit_int8(0x0F);
4969   emit_int8(0x38);
4970   emit_int8((unsigned char)0xF6);
4971   emit_int8((unsigned char)(0xC0 | encode));
4972 }
4973 
4974 void Assembler::adoxq(Register dst, Register src) {
4975   //assert(VM_Version::supports_adx(), "adx instructions not supported");
4976   emit_int8((unsigned char)0xF3);
4977   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4978   emit_int8(0x0F);
4979   emit_int8(0x38);
4980   emit_int8((unsigned char)0xF6);
4981   emit_int8((unsigned char)(0xC0 | encode));
4982 }
4983 
4984 void Assembler::andq(Address dst, int32_t imm32) {
4985   InstructionMark im(this);
4986   prefixq(dst);
4987   emit_int8((unsigned char)0x81);
4988   emit_operand(rsp, dst, 4);
4989   emit_int32(imm32);
4990 }
4991 
4992 void Assembler::andq(Register dst, int32_t imm32) {
4993   (void) prefixq_and_encode(dst->encoding());
4994   emit_arith(0x81, 0xE0, dst, imm32);
4995 }
4996 
4997 void Assembler::andq(Register dst, Address src) {
4998   InstructionMark im(this);
4999   prefixq(src, dst);
5000   emit_int8(0x23);
5001   emit_operand(dst, src);
5002 }
5003 
5004 void Assembler::andq(Register dst, Register src) {
5005   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5006   emit_arith(0x23, 0xC0, dst, src);
5007 }
5008 
5009 void Assembler::andnq(Register dst, Register src1, Register src2) {
5010   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5011   int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
5012   emit_int8((unsigned char)0xF2);
5013   emit_int8((unsigned char)(0xC0 | encode));
5014 }
5015 
5016 void Assembler::andnq(Register dst, Register src1, Address src2) {
5017   InstructionMark im(this);
5018   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5019   vex_prefix_0F38_q(dst, src1, src2);
5020   emit_int8((unsigned char)0xF2);
5021   emit_operand(dst, src2);
5022 }
5023 
5024 void Assembler::bsfq(Register dst, Register src) {
5025   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5026   emit_int8(0x0F);
5027   emit_int8((unsigned char)0xBC);
5028   emit_int8((unsigned char)(0xC0 | encode));
5029 }
5030 
5031 void Assembler::bsrq(Register dst, Register src) {
5032   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5033   emit_int8(0x0F);
5034   emit_int8((unsigned char)0xBD);
5035   emit_int8((unsigned char)(0xC0 | encode));
5036 }
5037 
5038 void Assembler::bswapq(Register reg) {
5039   int encode = prefixq_and_encode(reg->encoding());
5040   emit_int8(0x0F);
5041   emit_int8((unsigned char)(0xC8 | encode));
5042 }
5043 
5044 void Assembler::blsiq(Register dst, Register src) {
5045   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5046   int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
5047   emit_int8((unsigned char)0xF3);
5048   emit_int8((unsigned char)(0xC0 | encode));
5049 }
5050 
5051 void Assembler::blsiq(Register dst, Address src) {
5052   InstructionMark im(this);
5053   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5054   vex_prefix_0F38_q(rbx, dst, src);
5055   emit_int8((unsigned char)0xF3);
5056   emit_operand(rbx, src);
5057 }
5058 
5059 void Assembler::blsmskq(Register dst, Register src) {
5060   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5061   int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
5062   emit_int8((unsigned char)0xF3);
5063   emit_int8((unsigned char)(0xC0 | encode));
5064 }
5065 
5066 void Assembler::blsmskq(Register dst, Address src) {
5067   InstructionMark im(this);
5068   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5069   vex_prefix_0F38_q(rdx, dst, src);
5070   emit_int8((unsigned char)0xF3);
5071   emit_operand(rdx, src);
5072 }
5073 
5074 void Assembler::blsrq(Register dst, Register src) {
5075   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5076   int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
5077   emit_int8((unsigned char)0xF3);
5078   emit_int8((unsigned char)(0xC0 | encode));
5079 }
5080 
5081 void Assembler::blsrq(Register dst, Address src) {
5082   InstructionMark im(this);
5083   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
5084   vex_prefix_0F38_q(rcx, dst, src);
5085   emit_int8((unsigned char)0xF3);
5086   emit_operand(rcx, src);
5087 }
5088 
5089 void Assembler::cdqq() {
5090   prefix(REX_W);
5091   emit_int8((unsigned char)0x99);
5092 }
5093 
5094 void Assembler::clflush(Address adr) {
5095   prefix(adr);
5096   emit_int8(0x0F);
5097   emit_int8((unsigned char)0xAE);
5098   emit_operand(rdi, adr);
5099 }
5100 
5101 void Assembler::cmovq(Condition cc, Register dst, Register src) {
5102   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5103   emit_int8(0x0F);
5104   emit_int8(0x40 | cc);
5105   emit_int8((unsigned char)(0xC0 | encode));
5106 }
5107 
5108 void Assembler::cmovq(Condition cc, Register dst, Address src) {
5109   InstructionMark im(this);
5110   prefixq(src, dst);
5111   emit_int8(0x0F);
5112   emit_int8(0x40 | cc);
5113   emit_operand(dst, src);
5114 }
5115 
5116 void Assembler::cmpq(Address dst, int32_t imm32) {
5117   InstructionMark im(this);
5118   prefixq(dst);
5119   emit_int8((unsigned char)0x81);
5120   emit_operand(rdi, dst, 4);
5121   emit_int32(imm32);
5122 }
5123 
5124 void Assembler::cmpq(Register dst, int32_t imm32) {
5125   (void) prefixq_and_encode(dst->encoding());
5126   emit_arith(0x81, 0xF8, dst, imm32);
5127 }
5128 
5129 void Assembler::cmpq(Address dst, Register src) {
5130   InstructionMark im(this);
5131   prefixq(dst, src);
5132   emit_int8(0x3B);
5133   emit_operand(src, dst);
5134 }
5135 
5136 void Assembler::cmpq(Register dst, Register src) {
5137   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5138   emit_arith(0x3B, 0xC0, dst, src);
5139 }
5140 
5141 void Assembler::cmpq(Register dst, Address  src) {
5142   InstructionMark im(this);
5143   prefixq(src, dst);
5144   emit_int8(0x3B);
5145   emit_operand(dst, src);
5146 }
5147 
5148 void Assembler::cmpxchgq(Register reg, Address adr) {
5149   InstructionMark im(this);
5150   prefixq(adr, reg);
5151   emit_int8(0x0F);
5152   emit_int8((unsigned char)0xB1);
5153   emit_operand(reg, adr);
5154 }
5155 
5156 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
5157   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5158   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
5159   emit_int8(0x2A);
5160   emit_int8((unsigned char)(0xC0 | encode));
5161 }
5162 
5163 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
5164   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5165   InstructionMark im(this);
5166   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
5167   emit_int8(0x2A);
5168   emit_operand(dst, src);
5169 }
5170 
5171 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
5172   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5173   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
5174   emit_int8(0x2A);
5175   emit_int8((unsigned char)(0xC0 | encode));
5176 }
5177 
5178 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
5179   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5180   InstructionMark im(this);
5181   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
5182   emit_int8(0x2A);
5183   emit_operand(dst, src);
5184 }
5185 
5186 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
5187   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5188   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
5189   emit_int8(0x2C);
5190   emit_int8((unsigned char)(0xC0 | encode));
5191 }
5192 
5193 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
5194   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5195   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
5196   emit_int8(0x2C);
5197   emit_int8((unsigned char)(0xC0 | encode));
5198 }
5199 
5200 void Assembler::decl(Register dst) {
5201   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5202   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
5203   int encode = prefix_and_encode(dst->encoding());
5204   emit_int8((unsigned char)0xFF);
5205   emit_int8((unsigned char)(0xC8 | encode));
5206 }
5207 
5208 void Assembler::decq(Register dst) {
5209   // Don't use it directly. Use MacroAssembler::decrementq() instead.
5210   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5211   int encode = prefixq_and_encode(dst->encoding());
5212   emit_int8((unsigned char)0xFF);
5213   emit_int8(0xC8 | encode);
5214 }
5215 
5216 void Assembler::decq(Address dst) {
5217   // Don't use it directly. Use MacroAssembler::decrementq() instead.
5218   InstructionMark im(this);
5219   prefixq(dst);
5220   emit_int8((unsigned char)0xFF);
5221   emit_operand(rcx, dst);
5222 }
5223 
5224 void Assembler::fxrstor(Address src) {
5225   prefixq(src);
5226   emit_int8(0x0F);
5227   emit_int8((unsigned char)0xAE);
5228   emit_operand(as_Register(1), src);
5229 }
5230 
5231 void Assembler::fxsave(Address dst) {
5232   prefixq(dst);
5233   emit_int8(0x0F);
5234   emit_int8((unsigned char)0xAE);
5235   emit_operand(as_Register(0), dst);
5236 }
5237 
5238 void Assembler::idivq(Register src) {
5239   int encode = prefixq_and_encode(src->encoding());
5240   emit_int8((unsigned char)0xF7);
5241   emit_int8((unsigned char)(0xF8 | encode));
5242 }
5243 
5244 void Assembler::imulq(Register dst, Register src) {
5245   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5246   emit_int8(0x0F);
5247   emit_int8((unsigned char)0xAF);
5248   emit_int8((unsigned char)(0xC0 | encode));
5249 }
5250 
5251 void Assembler::imulq(Register dst, Register src, int value) {
5252   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5253   if (is8bit(value)) {
5254     emit_int8(0x6B);
5255     emit_int8((unsigned char)(0xC0 | encode));
5256     emit_int8(value & 0xFF);
5257   } else {
5258     emit_int8(0x69);
5259     emit_int8((unsigned char)(0xC0 | encode));
5260     emit_int32(value);
5261   }
5262 }
5263 
5264 void Assembler::imulq(Register dst, Address src) {
5265   InstructionMark im(this);
5266   prefixq(src, dst);
5267   emit_int8(0x0F);
5268   emit_int8((unsigned char) 0xAF);
5269   emit_operand(dst, src);
5270 }
5271 
5272 void Assembler::incl(Register dst) {
5273   // Don't use it directly. Use MacroAssembler::incrementl() instead.
5274   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5275   int encode = prefix_and_encode(dst->encoding());
5276   emit_int8((unsigned char)0xFF);
5277   emit_int8((unsigned char)(0xC0 | encode));
5278 }
5279 
5280 void Assembler::incq(Register dst) {
5281   // Don't use it directly. Use MacroAssembler::incrementq() instead.
5282   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
5283   int encode = prefixq_and_encode(dst->encoding());
5284   emit_int8((unsigned char)0xFF);
5285   emit_int8((unsigned char)(0xC0 | encode));
5286 }
5287 
5288 void Assembler::incq(Address dst) {
5289   // Don't use it directly. Use MacroAssembler::incrementq() instead.
5290   InstructionMark im(this);
5291   prefixq(dst);
5292   emit_int8((unsigned char)0xFF);
5293   emit_operand(rax, dst);
5294 }
5295 
5296 void Assembler::lea(Register dst, Address src) {
5297   leaq(dst, src);
5298 }
5299 
5300 void Assembler::leaq(Register dst, Address src) {
5301   InstructionMark im(this);
5302   prefixq(src, dst);
5303   emit_int8((unsigned char)0x8D);
5304   emit_operand(dst, src);
5305 }
5306 
5307 void Assembler::mov64(Register dst, int64_t imm64) {
5308   InstructionMark im(this);
5309   int encode = prefixq_and_encode(dst->encoding());
5310   emit_int8((unsigned char)(0xB8 | encode));
5311   emit_int64(imm64);
5312 }
5313 
5314 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
5315   InstructionMark im(this);
5316   int encode = prefixq_and_encode(dst->encoding());
5317   emit_int8(0xB8 | encode);
5318   emit_data64(imm64, rspec);
5319 }
5320 
5321 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
5322   InstructionMark im(this);
5323   int encode = prefix_and_encode(dst->encoding());
5324   emit_int8((unsigned char)(0xB8 | encode));
5325   emit_data((int)imm32, rspec, narrow_oop_operand);
5326 }
5327 
5328 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5329   InstructionMark im(this);
5330   prefix(dst);
5331   emit_int8((unsigned char)0xC7);
5332   emit_operand(rax, dst, 4);
5333   emit_data((int)imm32, rspec, narrow_oop_operand);
5334 }
5335 
5336 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5337   InstructionMark im(this);
5338   int encode = prefix_and_encode(src1->encoding());
5339   emit_int8((unsigned char)0x81);
5340   emit_int8((unsigned char)(0xF8 | encode));
5341   emit_data((int)imm32, rspec, narrow_oop_operand);
5342 }
5343 
5344 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5345   InstructionMark im(this);
5346   prefix(src1);
5347   emit_int8((unsigned char)0x81);
5348   emit_operand(rax, src1, 4);
5349   emit_data((int)imm32, rspec, narrow_oop_operand);
5350 }
5351 
5352 void Assembler::lzcntq(Register dst, Register src) {
5353   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5354   emit_int8((unsigned char)0xF3);
5355   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5356   emit_int8(0x0F);
5357   emit_int8((unsigned char)0xBD);
5358   emit_int8((unsigned char)(0xC0 | encode));
5359 }
5360 
5361 void Assembler::movdq(XMMRegister dst, Register src) {
5362   // table D-1 says MMX/SSE2
5363   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5364   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5365   emit_int8(0x6E);
5366   emit_int8((unsigned char)(0xC0 | encode));
5367 }
5368 
5369 void Assembler::movdq(Register dst, XMMRegister src) {
5370   // table D-1 says MMX/SSE2
5371   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5372   // swap src/dst to get correct prefix
5373   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5374   emit_int8(0x7E);
5375   emit_int8((unsigned char)(0xC0 | encode));
5376 }
5377 
5378 void Assembler::movq(Register dst, Register src) {
5379   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5380   emit_int8((unsigned char)0x8B);
5381   emit_int8((unsigned char)(0xC0 | encode));
5382 }
5383 
5384 void Assembler::movq(Register dst, Address src) {
5385   InstructionMark im(this);
5386   prefixq(src, dst);
5387   emit_int8((unsigned char)0x8B);
5388   emit_operand(dst, src);
5389 }
5390 
5391 void Assembler::movq(Address dst, Register src) {
5392   InstructionMark im(this);
5393   prefixq(dst, src);
5394   emit_int8((unsigned char)0x89);
5395   emit_operand(src, dst);
5396 }
5397 
5398 void Assembler::movsbq(Register dst, Address src) {
5399   InstructionMark im(this);
5400   prefixq(src, dst);
5401   emit_int8(0x0F);
5402   emit_int8((unsigned char)0xBE);
5403   emit_operand(dst, src);
5404 }
5405 
5406 void Assembler::movsbq(Register dst, Register src) {
5407   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5408   emit_int8(0x0F);
5409   emit_int8((unsigned char)0xBE);
5410   emit_int8((unsigned char)(0xC0 | encode));
5411 }
5412 
5413 void Assembler::movslq(Register dst, int32_t imm32) {
5414   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5415   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5416   // as a result we shouldn't use until tested at runtime...
5417   ShouldNotReachHere();
5418   InstructionMark im(this);
5419   int encode = prefixq_and_encode(dst->encoding());
5420   emit_int8((unsigned char)(0xC7 | encode));
5421   emit_int32(imm32);
5422 }
5423 
5424 void Assembler::movslq(Address dst, int32_t imm32) {
5425   assert(is_simm32(imm32), "lost bits");
5426   InstructionMark im(this);
5427   prefixq(dst);
5428   emit_int8((unsigned char)0xC7);
5429   emit_operand(rax, dst, 4);
5430   emit_int32(imm32);
5431 }
5432 
5433 void Assembler::movslq(Register dst, Address src) {
5434   InstructionMark im(this);
5435   prefixq(src, dst);
5436   emit_int8(0x63);
5437   emit_operand(dst, src);
5438 }
5439 
5440 void Assembler::movslq(Register dst, Register src) {
5441   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5442   emit_int8(0x63);
5443   emit_int8((unsigned char)(0xC0 | encode));
5444 }
5445 
5446 void Assembler::movswq(Register dst, Address src) {
5447   InstructionMark im(this);
5448   prefixq(src, dst);
5449   emit_int8(0x0F);
5450   emit_int8((unsigned char)0xBF);
5451   emit_operand(dst, src);
5452 }
5453 
5454 void Assembler::movswq(Register dst, Register src) {
5455   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5456   emit_int8((unsigned char)0x0F);
5457   emit_int8((unsigned char)0xBF);
5458   emit_int8((unsigned char)(0xC0 | encode));
5459 }
5460 
5461 void Assembler::movzbq(Register dst, Address src) {
5462   InstructionMark im(this);
5463   prefixq(src, dst);
5464   emit_int8((unsigned char)0x0F);
5465   emit_int8((unsigned char)0xB6);
5466   emit_operand(dst, src);
5467 }
5468 
5469 void Assembler::movzbq(Register dst, Register src) {
5470   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5471   emit_int8(0x0F);
5472   emit_int8((unsigned char)0xB6);
5473   emit_int8(0xC0 | encode);
5474 }
5475 
5476 void Assembler::movzwq(Register dst, Address src) {
5477   InstructionMark im(this);
5478   prefixq(src, dst);
5479   emit_int8((unsigned char)0x0F);
5480   emit_int8((unsigned char)0xB7);
5481   emit_operand(dst, src);
5482 }
5483 
5484 void Assembler::movzwq(Register dst, Register src) {
5485   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5486   emit_int8((unsigned char)0x0F);
5487   emit_int8((unsigned char)0xB7);
5488   emit_int8((unsigned char)(0xC0 | encode));
5489 }
5490 
5491 void Assembler::mulq(Address src) {
5492   InstructionMark im(this);
5493   prefixq(src);
5494   emit_int8((unsigned char)0xF7);
5495   emit_operand(rsp, src);
5496 }
5497 
5498 void Assembler::mulq(Register src) {
5499   int encode = prefixq_and_encode(src->encoding());
5500   emit_int8((unsigned char)0xF7);
5501   emit_int8((unsigned char)(0xE0 | encode));
5502 }
5503 
5504 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
5505   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5506   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);
5507   emit_int8((unsigned char)0xF6);
5508   emit_int8((unsigned char)(0xC0 | encode));
5509 }
5510 
5511 void Assembler::negq(Register dst) {
5512   int encode = prefixq_and_encode(dst->encoding());
5513   emit_int8((unsigned char)0xF7);
5514   emit_int8((unsigned char)(0xD8 | encode));
5515 }
5516 
5517 void Assembler::notq(Register dst) {
5518   int encode = prefixq_and_encode(dst->encoding());
5519   emit_int8((unsigned char)0xF7);
5520   emit_int8((unsigned char)(0xD0 | encode));
5521 }
5522 
5523 void Assembler::orq(Address dst, int32_t imm32) {
5524   InstructionMark im(this);
5525   prefixq(dst);
5526   emit_int8((unsigned char)0x81);
5527   emit_operand(rcx, dst, 4);
5528   emit_int32(imm32);
5529 }
5530 
5531 void Assembler::orq(Register dst, int32_t imm32) {
5532   (void) prefixq_and_encode(dst->encoding());
5533   emit_arith(0x81, 0xC8, dst, imm32);
5534 }
5535 
5536 void Assembler::orq(Register dst, Address src) {
5537   InstructionMark im(this);
5538   prefixq(src, dst);
5539   emit_int8(0x0B);
5540   emit_operand(dst, src);
5541 }
5542 
5543 void Assembler::orq(Register dst, Register src) {
5544   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5545   emit_arith(0x0B, 0xC0, dst, src);
5546 }
5547 
5548 void Assembler::popa() { // 64bit
5549   movq(r15, Address(rsp, 0));
5550   movq(r14, Address(rsp, wordSize));
5551   movq(r13, Address(rsp, 2 * wordSize));
5552   movq(r12, Address(rsp, 3 * wordSize));
5553   movq(r11, Address(rsp, 4 * wordSize));
5554   movq(r10, Address(rsp, 5 * wordSize));
5555   movq(r9,  Address(rsp, 6 * wordSize));
5556   movq(r8,  Address(rsp, 7 * wordSize));
5557   movq(rdi, Address(rsp, 8 * wordSize));
5558   movq(rsi, Address(rsp, 9 * wordSize));
5559   movq(rbp, Address(rsp, 10 * wordSize));
5560   // skip rsp
5561   movq(rbx, Address(rsp, 12 * wordSize));
5562   movq(rdx, Address(rsp, 13 * wordSize));
5563   movq(rcx, Address(rsp, 14 * wordSize));
5564   movq(rax, Address(rsp, 15 * wordSize));
5565 
5566   addq(rsp, 16 * wordSize);
5567 }
5568 
5569 void Assembler::popcntq(Register dst, Address src) {
5570   assert(VM_Version::supports_popcnt(), "must support");
5571   InstructionMark im(this);
5572   emit_int8((unsigned char)0xF3);
5573   prefixq(src, dst);
5574   emit_int8((unsigned char)0x0F);
5575   emit_int8((unsigned char)0xB8);
5576   emit_operand(dst, src);
5577 }
5578 
5579 void Assembler::popcntq(Register dst, Register src) {
5580   assert(VM_Version::supports_popcnt(), "must support");
5581   emit_int8((unsigned char)0xF3);
5582   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5583   emit_int8((unsigned char)0x0F);
5584   emit_int8((unsigned char)0xB8);
5585   emit_int8((unsigned char)(0xC0 | encode));
5586 }
5587 
5588 void Assembler::popq(Address dst) {
5589   InstructionMark im(this);
5590   prefixq(dst);
5591   emit_int8((unsigned char)0x8F);
5592   emit_operand(rax, dst);
5593 }
5594 
5595 void Assembler::pusha() { // 64bit
5596   // we have to store original rsp.  ABI says that 128 bytes
5597   // below rsp are local scratch.
5598   movq(Address(rsp, -5 * wordSize), rsp);
5599 
5600   subq(rsp, 16 * wordSize);
5601 
5602   movq(Address(rsp, 15 * wordSize), rax);
5603   movq(Address(rsp, 14 * wordSize), rcx);
5604   movq(Address(rsp, 13 * wordSize), rdx);
5605   movq(Address(rsp, 12 * wordSize), rbx);
5606   // skip rsp
5607   movq(Address(rsp, 10 * wordSize), rbp);
5608   movq(Address(rsp, 9 * wordSize), rsi);
5609   movq(Address(rsp, 8 * wordSize), rdi);
5610   movq(Address(rsp, 7 * wordSize), r8);
5611   movq(Address(rsp, 6 * wordSize), r9);
5612   movq(Address(rsp, 5 * wordSize), r10);
5613   movq(Address(rsp, 4 * wordSize), r11);
5614   movq(Address(rsp, 3 * wordSize), r12);
5615   movq(Address(rsp, 2 * wordSize), r13);
5616   movq(Address(rsp, wordSize), r14);
5617   movq(Address(rsp, 0), r15);
5618 }
5619 
5620 void Assembler::pushq(Address src) {
5621   InstructionMark im(this);
5622   prefixq(src);
5623   emit_int8((unsigned char)0xFF);
5624   emit_operand(rsi, src);
5625 }
5626 
5627 void Assembler::rclq(Register dst, int imm8) {
5628   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5629   int encode = prefixq_and_encode(dst->encoding());
5630   if (imm8 == 1) {
5631     emit_int8((unsigned char)0xD1);
5632     emit_int8((unsigned char)(0xD0 | encode));
5633   } else {
5634     emit_int8((unsigned char)0xC1);
5635     emit_int8((unsigned char)(0xD0 | encode));
5636     emit_int8(imm8);
5637   }
5638 }
5639 
5640 void Assembler::rcrq(Register dst, int imm8) {
5641   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5642   int encode = prefixq_and_encode(dst->encoding());
5643   if (imm8 == 1) {
5644     emit_int8((unsigned char)0xD1);
5645     emit_int8((unsigned char)(0xD8 | encode));
5646   } else {
5647     emit_int8((unsigned char)0xC1);
5648     emit_int8((unsigned char)(0xD8 | encode));
5649     emit_int8(imm8);
5650   }
5651 }
5652 
5653 void Assembler::rorq(Register dst, int imm8) {
5654   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5655   int encode = prefixq_and_encode(dst->encoding());
5656   if (imm8 == 1) {
5657     emit_int8((unsigned char)0xD1);
5658     emit_int8((unsigned char)(0xC8 | encode));
5659   } else {
5660     emit_int8((unsigned char)0xC1);
5661     emit_int8((unsigned char)(0xc8 | encode));
5662     emit_int8(imm8);
5663   }
5664 }
5665 
5666 void Assembler::rorxq(Register dst, Register src, int imm8) {
5667   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
5668   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);
5669   emit_int8((unsigned char)0xF0);
5670   emit_int8((unsigned char)(0xC0 | encode));
5671   emit_int8(imm8);
5672 }
5673 
5674 void Assembler::sarq(Register dst, int imm8) {
5675   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5676   int encode = prefixq_and_encode(dst->encoding());
5677   if (imm8 == 1) {
5678     emit_int8((unsigned char)0xD1);
5679     emit_int8((unsigned char)(0xF8 | encode));
5680   } else {
5681     emit_int8((unsigned char)0xC1);
5682     emit_int8((unsigned char)(0xF8 | encode));
5683     emit_int8(imm8);
5684   }
5685 }
5686 
5687 void Assembler::sarq(Register dst) {
5688   int encode = prefixq_and_encode(dst->encoding());
5689   emit_int8((unsigned char)0xD3);
5690   emit_int8((unsigned char)(0xF8 | encode));
5691 }
5692 
5693 void Assembler::sbbq(Address dst, int32_t imm32) {
5694   InstructionMark im(this);
5695   prefixq(dst);
5696   emit_arith_operand(0x81, rbx, dst, imm32);
5697 }
5698 
5699 void Assembler::sbbq(Register dst, int32_t imm32) {
5700   (void) prefixq_and_encode(dst->encoding());
5701   emit_arith(0x81, 0xD8, dst, imm32);
5702 }
5703 
5704 void Assembler::sbbq(Register dst, Address src) {
5705   InstructionMark im(this);
5706   prefixq(src, dst);
5707   emit_int8(0x1B);
5708   emit_operand(dst, src);
5709 }
5710 
5711 void Assembler::sbbq(Register dst, Register src) {
5712   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5713   emit_arith(0x1B, 0xC0, dst, src);
5714 }
5715 
5716 void Assembler::shlq(Register dst, int imm8) {
5717   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5718   int encode = prefixq_and_encode(dst->encoding());
5719   if (imm8 == 1) {
5720     emit_int8((unsigned char)0xD1);
5721     emit_int8((unsigned char)(0xE0 | encode));
5722   } else {
5723     emit_int8((unsigned char)0xC1);
5724     emit_int8((unsigned char)(0xE0 | encode));
5725     emit_int8(imm8);
5726   }
5727 }
5728 
5729 void Assembler::shlq(Register dst) {
5730   int encode = prefixq_and_encode(dst->encoding());
5731   emit_int8((unsigned char)0xD3);
5732   emit_int8((unsigned char)(0xE0 | encode));
5733 }
5734 
5735 void Assembler::shrq(Register dst, int imm8) {
5736   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5737   int encode = prefixq_and_encode(dst->encoding());
5738   emit_int8((unsigned char)0xC1);
5739   emit_int8((unsigned char)(0xE8 | encode));
5740   emit_int8(imm8);
5741 }
5742 
5743 void Assembler::shrq(Register dst) {
5744   int encode = prefixq_and_encode(dst->encoding());
5745   emit_int8((unsigned char)0xD3);
5746   emit_int8(0xE8 | encode);
5747 }
5748 
5749 void Assembler::subq(Address dst, int32_t imm32) {
5750   InstructionMark im(this);
5751   prefixq(dst);
5752   emit_arith_operand(0x81, rbp, dst, imm32);
5753 }
5754 
5755 void Assembler::subq(Address dst, Register src) {
5756   InstructionMark im(this);
5757   prefixq(dst, src);
5758   emit_int8(0x29);
5759   emit_operand(src, dst);
5760 }
5761 
5762 void Assembler::subq(Register dst, int32_t imm32) {
5763   (void) prefixq_and_encode(dst->encoding());
5764   emit_arith(0x81, 0xE8, dst, imm32);
5765 }
5766 
5767 // Force generation of a 4 byte immediate value even if it fits into 8bit
5768 void Assembler::subq_imm32(Register dst, int32_t imm32) {
5769   (void) prefixq_and_encode(dst->encoding());
5770   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5771 }
5772 
5773 void Assembler::subq(Register dst, Address src) {
5774   InstructionMark im(this);
5775   prefixq(src, dst);
5776   emit_int8(0x2B);
5777   emit_operand(dst, src);
5778 }
5779 
5780 void Assembler::subq(Register dst, Register src) {
5781   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5782   emit_arith(0x2B, 0xC0, dst, src);
5783 }
5784 
5785 void Assembler::testq(Register dst, int32_t imm32) {
5786   // not using emit_arith because test
5787   // doesn't support sign-extension of
5788   // 8bit operands
5789   int encode = dst->encoding();
5790   if (encode == 0) {
5791     prefix(REX_W);
5792     emit_int8((unsigned char)0xA9);
5793   } else {
5794     encode = prefixq_and_encode(encode);
5795     emit_int8((unsigned char)0xF7);
5796     emit_int8((unsigned char)(0xC0 | encode));
5797   }
5798   emit_int32(imm32);
5799 }
5800 
5801 void Assembler::testq(Register dst, Register src) {
5802   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5803   emit_arith(0x85, 0xC0, dst, src);
5804 }
5805 
5806 void Assembler::xaddq(Address dst, Register src) {
5807   InstructionMark im(this);
5808   prefixq(dst, src);
5809   emit_int8(0x0F);
5810   emit_int8((unsigned char)0xC1);
5811   emit_operand(src, dst);
5812 }
5813 
5814 void Assembler::xchgq(Register dst, Address src) {
5815   InstructionMark im(this);
5816   prefixq(src, dst);
5817   emit_int8((unsigned char)0x87);
5818   emit_operand(dst, src);
5819 }
5820 
5821 void Assembler::xchgq(Register dst, Register src) {
5822   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5823   emit_int8((unsigned char)0x87);
5824   emit_int8((unsigned char)(0xc0 | encode));
5825 }
5826 
5827 void Assembler::xorq(Register dst, Register src) {
5828   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5829   emit_arith(0x33, 0xC0, dst, src);
5830 }
5831 
5832 void Assembler::xorq(Register dst, Address src) {
5833   InstructionMark im(this);
5834   prefixq(src, dst);
5835   emit_int8(0x33);
5836   emit_operand(dst, src);
5837 }
5838 
5839 #endif // !LP64