ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

dasm_x86.lua (71256B)


      1 ------------------------------------------------------------------------------
      2 -- DynASM x86/x64 module.
      3 --
      4 -- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
      5 -- See dynasm.lua for full copyright notice.
      6 ------------------------------------------------------------------------------
      7 
      8 local x64 = x64
      9 
     10 -- Module information:
     11 local _info = {
     12   arch =	x64 and "x64" or "x86",
     13   description =	"DynASM x86/x64 module",
     14   version =	"1.4.0",
     15   vernum =	 10400,
     16   release =	"2015-10-18",
     17   author =	"Mike Pall",
     18   license =	"MIT",
     19 }
     20 
     21 -- Exported glue functions for the arch-specific module.
     22 local _M = { _info = _info }
     23 
     24 -- Cache library functions.
     25 local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
     26 local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
     27 local _s = string
     28 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
     29 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
     30 local concat, sort, remove = table.concat, table.sort, table.remove
     31 local bit = bit or require("bit")
     32 local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
     33 
     34 -- Inherited tables and callbacks.
     35 local g_opt, g_arch
     36 local wline, werror, wfatal, wwarn
     37 
     38 -- Action name list.
     39 -- CHECK: Keep this in sync with the C code!
     40 local action_names = {
     41   -- int arg, 1 buffer pos:
     42   "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
     43   -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
     44   "VREG", "SPACE",
     45   -- ptrdiff_t arg, 1 buffer pos (address): !x64
     46   "SETLABEL", "REL_A",
     47   -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
     48   "REL_LG", "REL_PC",
     49   -- action arg (1 byte) or int arg, 1 buffer pos (link):
     50   "IMM_LG", "IMM_PC",
     51   -- action arg (1 byte) or int arg, 1 buffer pos (offset):
     52   "LABEL_LG", "LABEL_PC",
     53   -- action arg (1 byte), 1 buffer pos (offset):
     54   "ALIGN",
     55   -- action args (2 bytes), no buffer pos.
     56   "EXTERN",
     57   -- action arg (1 byte), no buffer pos.
     58   "ESC",
     59   -- no action arg, no buffer pos.
     60   "MARK",
     61   -- action arg (1 byte), no buffer pos, terminal action:
     62   "SECTION",
     63   -- no args, no buffer pos, terminal action:
     64   "STOP"
     65 }
     66 
     67 -- Maximum number of section buffer positions for dasm_put().
     68 -- CHECK: Keep this in sync with the C code!
     69 local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
     70 
     71 -- Action name -> action number (dynamically generated below).
     72 local map_action = {}
     73 -- First action number. Everything below does not need to be escaped.
     74 local actfirst = 256-#action_names
     75 
     76 -- Action list buffer and string (only used to remove dupes).
     77 local actlist = {}
     78 local actstr = ""
     79 
     80 -- Argument list for next dasm_put(). Start with offset 0 into action list.
     81 local actargs = { 0 }
     82 
     83 -- Current number of section buffer positions for dasm_put().
     84 local secpos = 1
     85 
     86 -- VREG kind encodings, pre-shifted by 5 bits.
     87 local map_vreg = {
     88   ["modrm.rm.m"] = 0x00,
     89   ["modrm.rm.r"] = 0x20,
     90   ["opcode"] =     0x20,
     91   ["sib.base"] =   0x20,
     92   ["sib.index"] =  0x40,
     93   ["modrm.reg"] =  0x80,
     94   ["vex.v"] =      0xa0,
     95   ["imm.hi"] =     0xc0,
     96 }
     97 
     98 -- Current number of VREG actions contributing to REX/VEX shrinkage.
     99 local vreg_shrink_count = 0
    100 
    101 ------------------------------------------------------------------------------
    102 
    103 -- Compute action numbers for action names.
    104 for n,name in ipairs(action_names) do
    105   local num = actfirst + n - 1
    106   map_action[name] = num
    107 end
    108 
    109 -- Dump action names and numbers.
    110 local function dumpactions(out)
    111   out:write("DynASM encoding engine action codes:\n")
    112   for n,name in ipairs(action_names) do
    113     local num = map_action[name]
    114     out:write(format("  %-10s %02X  %d\n", name, num, num))
    115   end
    116   out:write("\n")
    117 end
    118 
    119 -- Write action list buffer as a huge static C array.
    120 local function writeactions(out, name)
    121   local nn = #actlist
    122   local last = actlist[nn] or 255
    123   actlist[nn] = nil -- Remove last byte.
    124   if nn == 0 then nn = 1 end
    125   out:write("static const unsigned char ", name, "[", nn, "] = {\n")
    126   local s = "  "
    127   for n,b in ipairs(actlist) do
    128     s = s..b..","
    129     if #s >= 75 then
    130       assert(out:write(s, "\n"))
    131       s = "  "
    132     end
    133   end
    134   out:write(s, last, "\n};\n\n") -- Add last byte back.
    135 end
    136 
    137 ------------------------------------------------------------------------------
    138 
    139 -- Add byte to action list.
    140 local function wputxb(n)
    141   assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
    142   actlist[#actlist+1] = n
    143 end
    144 
    145 -- Add action to list with optional arg. Advance buffer pos, too.
    146 local function waction(action, a, num)
    147   wputxb(assert(map_action[action], "bad action name `"..action.."'"))
    148   if a then actargs[#actargs+1] = a end
    149   if a or num then secpos = secpos + (num or 1) end
    150 end
    151 
    152 -- Optionally add a VREG action.
    153 local function wvreg(kind, vreg, psz, sk, defer)
    154   if not vreg then return end
    155   waction("VREG", vreg)
    156   local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
    157   if b < (sk or 0) then
    158     vreg_shrink_count = vreg_shrink_count + 1
    159   end
    160   if not defer then
    161     b = b + vreg_shrink_count * 8
    162     vreg_shrink_count = 0
    163   end
    164   wputxb(b + (psz or 0))
    165 end
    166 
    167 -- Add call to embedded DynASM C code.
    168 local function wcall(func, args)
    169   wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
    170 end
    171 
    172 -- Delete duplicate action list chunks. A tad slow, but so what.
    173 local function dedupechunk(offset)
    174   local al, as = actlist, actstr
    175   local chunk = char(unpack(al, offset+1, #al))
    176   local orig = find(as, chunk, 1, true)
    177   if orig then
    178     actargs[1] = orig-1 -- Replace with original offset.
    179     for i=offset+1,#al do al[i] = nil end -- Kill dupe.
    180   else
    181     actstr = as..chunk
    182   end
    183 end
    184 
    185 -- Flush action list (intervening C code or buffer pos overflow).
    186 local function wflush(term)
    187   local offset = actargs[1]
    188   if #actlist == offset then return end -- Nothing to flush.
    189   if not term then waction("STOP") end -- Terminate action list.
    190   dedupechunk(offset)
    191   wcall("put", actargs) -- Add call to dasm_put().
    192   actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
    193   secpos = 1 -- The actionlist offset occupies a buffer position, too.
    194 end
    195 
    196 -- Put escaped byte.
    197 local function wputb(n)
    198   if n >= actfirst then waction("ESC") end -- Need to escape byte.
    199   wputxb(n)
    200 end
    201 
    202 ------------------------------------------------------------------------------
    203 
    204 -- Global label name -> global label number. With auto assignment on 1st use.
    205 local next_global = 10
    206 local map_global = setmetatable({}, { __index = function(t, name)
    207   if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
    208   local n = next_global
    209   if n > 246 then werror("too many global labels") end
    210   next_global = n + 1
    211   t[name] = n
    212   return n
    213 end})
    214 
    215 -- Dump global labels.
    216 local function dumpglobals(out, lvl)
    217   local t = {}
    218   for name, n in pairs(map_global) do t[n] = name end
    219   out:write("Global labels:\n")
    220   for i=10,next_global-1 do
    221     out:write(format("  %s\n", t[i]))
    222   end
    223   out:write("\n")
    224 end
    225 
    226 -- Write global label enum.
    227 local function writeglobals(out, prefix)
    228   local t = {}
    229   for name, n in pairs(map_global) do t[n] = name end
    230   out:write("enum {\n")
    231   for i=10,next_global-1 do
    232     out:write("  ", prefix, gsub(t[i], "@.*", ""), ",\n")
    233   end
    234   out:write("  ", prefix, "_MAX\n};\n")
    235 end
    236 
    237 -- Write global label names.
    238 local function writeglobalnames(out, name)
    239   local t = {}
    240   for name, n in pairs(map_global) do t[n] = name end
    241   out:write("static const char *const ", name, "[] = {\n")
    242   for i=10,next_global-1 do
    243     out:write("  \"", t[i], "\",\n")
    244   end
    245   out:write("  (const char *)0\n};\n")
    246 end
    247 
    248 ------------------------------------------------------------------------------
    249 
    250 -- Extern label name -> extern label number. With auto assignment on 1st use.
    251 local next_extern = -1
    252 local map_extern = setmetatable({}, { __index = function(t, name)
    253   -- No restrictions on the name for now.
    254   local n = next_extern
    255   if n < -256 then werror("too many extern labels") end
    256   next_extern = n - 1
    257   t[name] = n
    258   return n
    259 end})
    260 
    261 -- Dump extern labels.
    262 local function dumpexterns(out, lvl)
    263   local t = {}
    264   for name, n in pairs(map_extern) do t[-n] = name end
    265   out:write("Extern labels:\n")
    266   for i=1,-next_extern-1 do
    267     out:write(format("  %s\n", t[i]))
    268   end
    269   out:write("\n")
    270 end
    271 
    272 -- Write extern label names.
    273 local function writeexternnames(out, name)
    274   local t = {}
    275   for name, n in pairs(map_extern) do t[-n] = name end
    276   out:write("static const char *const ", name, "[] = {\n")
    277   for i=1,-next_extern-1 do
    278     out:write("  \"", t[i], "\",\n")
    279   end
    280   out:write("  (const char *)0\n};\n")
    281 end
    282 
    283 ------------------------------------------------------------------------------
    284 
    285 -- Arch-specific maps.
    286 local map_archdef = {}		-- Ext. register name -> int. name.
    287 local map_reg_rev = {}		-- Int. register name -> ext. name.
    288 local map_reg_num = {}		-- Int. register name -> register number.
    289 local map_reg_opsize = {}	-- Int. register name -> operand size.
    290 local map_reg_valid_base = {}	-- Int. register name -> valid base register?
    291 local map_reg_valid_index = {}	-- Int. register name -> valid index register?
    292 local map_reg_needrex = {}	-- Int. register name -> need rex vs. no rex.
    293 local reg_list = {}		-- Canonical list of int. register names.
    294 
    295 local map_type = {}		-- Type name -> { ctype, reg }
    296 local ctypenum = 0		-- Type number (for _PTx macros).
    297 
    298 local addrsize = x64 and "q" or "d"	-- Size for address operands.
    299 
    300 -- Helper functions to fill register maps.
    301 local function mkrmap(sz, cl, names)
    302   local cname = format("@%s", sz)
    303   reg_list[#reg_list+1] = cname
    304   map_archdef[cl] = cname
    305   map_reg_rev[cname] = cl
    306   map_reg_num[cname] = -1
    307   map_reg_opsize[cname] = sz
    308   if sz == addrsize or sz == "d" then
    309     map_reg_valid_base[cname] = true
    310     map_reg_valid_index[cname] = true
    311   end
    312   if names then
    313     for n,name in ipairs(names) do
    314       local iname = format("@%s%x", sz, n-1)
    315       reg_list[#reg_list+1] = iname
    316       map_archdef[name] = iname
    317       map_reg_rev[iname] = name
    318       map_reg_num[iname] = n-1
    319       map_reg_opsize[iname] = sz
    320       if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
    321       if sz == addrsize or sz == "d" then
    322 	map_reg_valid_base[iname] = true
    323 	map_reg_valid_index[iname] = true
    324       end
    325     end
    326   end
    327   for i=0,(x64 and sz ~= "f") and 15 or 7 do
    328     local needrex = sz == "b" and i > 3
    329     local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
    330     if needrex then map_reg_needrex[iname] = true end
    331     local name
    332     if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
    333     elseif sz == "f" then name = format("st%d", i)
    334     else name = format("r%d%s", i, sz == addrsize and "" or sz) end
    335     map_archdef[name] = iname
    336     if not map_reg_rev[iname] then
    337       reg_list[#reg_list+1] = iname
    338       map_reg_rev[iname] = name
    339       map_reg_num[iname] = i
    340       map_reg_opsize[iname] = sz
    341       if sz == addrsize or sz == "d" then
    342 	map_reg_valid_base[iname] = true
    343 	map_reg_valid_index[iname] = true
    344       end
    345     end
    346   end
    347   reg_list[#reg_list+1] = ""
    348 end
    349 
    350 -- Integer registers (qword, dword, word and byte sized).
    351 if x64 then
    352   mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
    353 end
    354 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
    355 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
    356 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
    357 map_reg_valid_index[map_archdef.esp] = false
    358 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
    359 if x64 then map_reg_needrex[map_archdef.Rb] = true end
    360 map_archdef["Ra"] = "@"..addrsize
    361 
    362 -- FP registers (internally tword sized, but use "f" as operand size).
    363 mkrmap("f", "Rf")
    364 
    365 -- SSE registers (oword sized, but qword and dword accessible).
    366 mkrmap("o", "xmm")
    367 
    368 -- AVX registers (yword sized, but oword, qword and dword accessible).
    369 mkrmap("y", "ymm")
    370 
    371 -- Operand size prefixes to codes.
    372 local map_opsize = {
    373   byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
    374   tword = "t", aword = addrsize,
    375 }
    376 
    377 -- Operand size code to number.
    378 local map_opsizenum = {
    379   b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
    380 }
    381 
    382 -- Operand size code to name.
    383 local map_opsizename = {
    384   b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
    385   t = "tword", f = "fpword",
    386 }
    387 
    388 -- Valid index register scale factors.
    389 local map_xsc = {
    390   ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
    391 }
    392 
    393 -- Condition codes.
    394 local map_cc = {
    395   o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
    396   s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
    397   c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
    398   pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
    399 }
    400 
    401 
    402 -- Reverse defines for registers.
    403 function _M.revdef(s)
    404   return gsub(s, "@%w+", map_reg_rev)
    405 end
    406 
    407 -- Dump register names and numbers
    408 local function dumpregs(out)
    409   out:write("Register names, sizes and internal numbers:\n")
    410   for _,reg in ipairs(reg_list) do
    411     if reg == "" then
    412       out:write("\n")
    413     else
    414       local name = map_reg_rev[reg]
    415       local num = map_reg_num[reg]
    416       local opsize = map_opsizename[map_reg_opsize[reg]]
    417       out:write(format("  %-5s %-8s %s\n", name, opsize,
    418 		       num < 0 and "(variable)" or num))
    419     end
    420   end
    421 end
    422 
    423 ------------------------------------------------------------------------------
    424 
    425 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
    426 local function wputlabel(aprefix, imm, num)
    427   if type(imm) == "number" then
    428     if imm < 0 then
    429       waction("EXTERN")
    430       wputxb(aprefix == "IMM_" and 0 or 1)
    431       imm = -imm-1
    432     else
    433       waction(aprefix.."LG", nil, num);
    434     end
    435     wputxb(imm)
    436   else
    437     waction(aprefix.."PC", imm, num)
    438   end
    439 end
    440 
    441 -- Put signed byte or arg.
    442 local function wputsbarg(n)
    443   if type(n) == "number" then
    444     if n < -128 or n > 127 then
    445       werror("signed immediate byte out of range")
    446     end
    447     if n < 0 then n = n + 256 end
    448     wputb(n)
    449   else waction("IMM_S", n) end
    450 end
    451 
    452 -- Put unsigned byte or arg.
    453 local function wputbarg(n)
    454   if type(n) == "number" then
    455     if n < 0 or n > 255 then
    456       werror("unsigned immediate byte out of range")
    457     end
    458     wputb(n)
    459   else waction("IMM_B", n) end
    460 end
    461 
    462 -- Put unsigned word or arg.
    463 local function wputwarg(n)
    464   if type(n) == "number" then
    465     if shr(n, 16) ~= 0 then
    466       werror("unsigned immediate word out of range")
    467     end
    468     wputb(band(n, 255)); wputb(shr(n, 8));
    469   else waction("IMM_W", n) end
    470 end
    471 
    472 -- Put signed or unsigned dword or arg.
    473 local function wputdarg(n)
    474   local tn = type(n)
    475   if tn == "number" then
    476     wputb(band(n, 255))
    477     wputb(band(shr(n, 8), 255))
    478     wputb(band(shr(n, 16), 255))
    479     wputb(shr(n, 24))
    480   elseif tn == "table" then
    481     wputlabel("IMM_", n[1], 1)
    482   else
    483     waction("IMM_D", n)
    484   end
    485 end
    486 
    487 -- Put operand-size dependent number or arg (defaults to dword).
    488 local function wputszarg(sz, n)
    489   if not sz or sz == "d" or sz == "q" then wputdarg(n)
    490   elseif sz == "w" then wputwarg(n)
    491   elseif sz == "b" then wputbarg(n)
    492   elseif sz == "s" then wputsbarg(n)
    493   else werror("bad operand size") end
    494 end
    495 
    496 -- Put multi-byte opcode with operand-size dependent modifications.
    497 local function wputop(sz, op, rex, vex, vregr, vregxb)
    498   local psz, sk = 0, nil
    499   if vex then
    500     local tail
    501     if vex.m == 1 and band(rex, 11) == 0 then
    502       if x64 and vregxb then
    503 	sk = map_vreg["modrm.reg"]
    504       else
    505 	wputb(0xc5)
    506       tail = shl(bxor(band(rex, 4), 4), 5)
    507       psz = 3
    508       end
    509     end
    510     if not tail then
    511       wputb(0xc4)
    512       wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
    513       tail = shl(band(rex, 8), 4)
    514       psz = 4
    515     end
    516     local reg, vreg = 0, nil
    517     if vex.v then
    518       reg = vex.v.reg
    519       if not reg then werror("bad vex operand") end
    520       if reg < 0 then reg = 0; vreg = vex.v.vreg end
    521     end
    522     if sz == "y" or vex.l then tail = tail + 4 end
    523     wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
    524     wvreg("vex.v", vreg)
    525     rex = 0
    526     if op >= 256 then werror("bad vex opcode") end
    527   else
    528     if rex ~= 0 then
    529       if not x64 then werror("bad operand size") end
    530     elseif (vregr or vregxb) and x64 then
    531       rex = 0x10
    532       sk = map_vreg["vex.v"]
    533     end
    534   end
    535   local r
    536   if sz == "w" then wputb(102) end
    537   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
    538   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
    539   if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
    540   if op >= 65536 then
    541     if rex ~= 0 then
    542       local opc3 = band(op, 0xffff00)
    543       if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
    544 	wputb(64 + band(rex, 15)); rex = 0; psz = 2
    545       end
    546     end
    547     wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
    548   end
    549   if op >= 256 then
    550     local b = shr(op, 8)
    551     if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
    552     wputb(b); op = band(op, 255); psz = psz + 1
    553   end
    554   if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
    555   if sz == "b" then op = op - 1 end
    556   wputb(op)
    557   return psz, sk
    558 end
    559 
    560 -- Put ModRM or SIB formatted byte.
    561 local function wputmodrm(m, s, rm, vs, vrm)
    562   assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
    563   wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
    564 end
    565 
    566 -- Put ModRM/SIB plus optional displacement.
    567 local function wputmrmsib(t, imark, s, vsreg, psz, sk)
    568   local vreg, vxreg
    569   local reg, xreg = t.reg, t.xreg
    570   if reg and reg < 0 then reg = 0; vreg = t.vreg end
    571   if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
    572   if s < 0 then s = 0 end
    573 
    574   -- Register mode.
    575   if sub(t.mode, 1, 1) == "r" then
    576     wputmodrm(3, s, reg)
    577     wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
    578     wvreg("modrm.rm.r", vreg, psz+1, sk)
    579     return
    580   end
    581 
    582   local disp = t.disp
    583   local tdisp = type(disp)
    584   -- No base register?
    585   if not reg then
    586     local riprel = false
    587     if xreg then
    588       -- Indexed mode with index register only.
    589       -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
    590       wputmodrm(0, s, 4)
    591       if imark == "I" then waction("MARK") end
    592       wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
    593       wputmodrm(t.xsc, xreg, 5)
    594       wvreg("sib.index", vxreg, psz+2, sk)
    595     else
    596       -- Pure 32 bit displacement.
    597       if x64 and tdisp ~= "table" then
    598 	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
    599 	wvreg("modrm.reg", vsreg, psz+1, sk)
    600 	if imark == "I" then waction("MARK") end
    601 	wputmodrm(0, 4, 5)
    602       else
    603 	riprel = x64
    604 	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
    605 	wvreg("modrm.reg", vsreg, psz+1, sk)
    606 	if imark == "I" then waction("MARK") end
    607       end
    608     end
    609     if riprel then -- Emit rip-relative displacement.
    610       if match("UWSiI", imark) then
    611 	werror("NYI: rip-relative displacement followed by immediate")
    612       end
    613       -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
    614       wputlabel("REL_", disp[1], 2)
    615     else
    616       wputdarg(disp)
    617     end
    618     return
    619   end
    620 
    621   local m
    622   if tdisp == "number" then -- Check displacement size at assembly time.
    623     if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
    624       if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
    625     elseif disp >= -128 and disp <= 127 then m = 1
    626     else m = 2 end
    627   elseif tdisp == "table" then
    628     m = 2
    629   end
    630 
    631   -- Index register present or esp as base register: need SIB encoding.
    632   if xreg or band(reg, 7) == 4 then
    633     wputmodrm(m or 2, s, 4) -- ModRM.
    634     if m == nil or imark == "I" then waction("MARK") end
    635     wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
    636     wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
    637     wvreg("sib.index", vxreg, psz+2, sk, vreg)
    638     wvreg("sib.base", vreg, psz+2, sk)
    639   else
    640     wputmodrm(m or 2, s, reg) -- ModRM.
    641     if (imark == "I" and (m == 1 or m == 2)) or
    642        (m == nil and (vsreg or vreg)) then waction("MARK") end
    643     wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
    644     wvreg("modrm.rm.m", vreg, psz+1, sk)
    645   end
    646 
    647   -- Put displacement.
    648   if m == 1 then wputsbarg(disp)
    649   elseif m == 2 then wputdarg(disp)
    650   elseif m == nil then waction("DISP", disp) end
    651 end
    652 
    653 ------------------------------------------------------------------------------
    654 
    655 -- Return human-readable operand mode string.
    656 local function opmodestr(op, args)
    657   local m = {}
    658   for i=1,#args do
    659     local a = args[i]
    660     m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
    661   end
    662   return op.." "..concat(m, ",")
    663 end
    664 
    665 -- Convert number to valid integer or nil.
    666 local function toint(expr)
    667   local n = tonumber(expr)
    668   if n then
    669     if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
    670       werror("bad integer number `"..expr.."'")
    671     end
    672     return n
    673   end
    674 end
    675 
    676 -- Parse immediate expression.
    677 local function immexpr(expr)
    678   -- &expr (pointer)
    679   if sub(expr, 1, 1) == "&" then
    680     return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
    681   end
    682 
    683   local prefix = sub(expr, 1, 2)
    684   -- =>expr (pc label reference)
    685   if prefix == "=>" then
    686     return "iJ", sub(expr, 3)
    687   end
    688   -- ->name (global label reference)
    689   if prefix == "->" then
    690     return "iJ", map_global[sub(expr, 3)]
    691   end
    692 
    693   -- [<>][1-9] (local label reference)
    694   local dir, lnum = match(expr, "^([<>])([1-9])$")
    695   if dir then -- Fwd: 247-255, Bkwd: 1-9.
    696     return "iJ", lnum + (dir == ">" and 246 or 0)
    697   end
    698 
    699   local extname = match(expr, "^extern%s+(%S+)$")
    700   if extname then
    701     return "iJ", map_extern[extname]
    702   end
    703 
    704   -- expr (interpreted as immediate)
    705   return "iI", expr
    706 end
    707 
    708 -- Parse displacement expression: +-num, +-expr, +-opsize*num
    709 local function dispexpr(expr)
    710   local disp = expr == "" and 0 or toint(expr)
    711   if disp then return disp end
    712   local c, dispt = match(expr, "^([+-])%s*(.+)$")
    713   if c == "+" then
    714     expr = dispt
    715   elseif not c then
    716     werror("bad displacement expression `"..expr.."'")
    717   end
    718   local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
    719   local ops, imm = map_opsize[opsize], toint(tailops)
    720   if ops and imm then
    721     if c == "-" then imm = -imm end
    722     return imm*map_opsizenum[ops]
    723   end
    724   local mode, iexpr = immexpr(dispt)
    725   if mode == "iJ" then
    726     if c == "-" then werror("cannot invert label reference") end
    727     return { iexpr }
    728   end
    729   return expr -- Need to return original signed expression.
    730 end
    731 
    732 -- Parse register or type expression.
    733 local function rtexpr(expr)
    734   if not expr then return end
    735   local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
    736   local tp = map_type[tname or expr]
    737   if tp then
    738     local reg = ovreg or tp.reg
    739     local rnum = map_reg_num[reg]
    740     if not rnum then
    741       werror("type `"..(tname or expr).."' needs a register override")
    742     end
    743     if not map_reg_valid_base[reg] then
    744       werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
    745     end
    746     return reg, rnum, tp
    747   end
    748   return expr, map_reg_num[expr]
    749 end
    750 
    751 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
    752 local function parseoperand(param)
    753   local t = {}
    754 
    755   local expr = param
    756   local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
    757   if opsize then
    758     t.opsize = map_opsize[opsize]
    759     if t.opsize then expr = tailops end
    760   end
    761 
    762   local br = match(expr, "^%[%s*(.-)%s*%]$")
    763   repeat
    764     if br then
    765       t.mode = "xm"
    766 
    767       -- [disp]
    768       t.disp = toint(br)
    769       if t.disp then
    770 	t.mode = x64 and "xm" or "xmO"
    771 	break
    772       end
    773 
    774       -- [reg...]
    775       local tp
    776       local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
    777       reg, t.reg, tp = rtexpr(reg)
    778       if not t.reg then
    779 	-- [expr]
    780 	t.mode = x64 and "xm" or "xmO"
    781 	t.disp = dispexpr("+"..br)
    782 	break
    783       end
    784 
    785       if t.reg == -1 then
    786 	t.vreg, tailr = match(tailr, "^(%b())(.*)$")
    787 	if not t.vreg then werror("bad variable register expression") end
    788       end
    789 
    790       -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
    791       local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
    792       if xsc then
    793 	if not map_reg_valid_index[reg] then
    794 	  werror("bad index register `"..map_reg_rev[reg].."'")
    795 	end
    796 	t.xsc = map_xsc[xsc]
    797 	t.xreg = t.reg
    798 	t.vxreg = t.vreg
    799 	t.reg = nil
    800 	t.vreg = nil
    801 	t.disp = dispexpr(tailsc)
    802 	break
    803       end
    804       if not map_reg_valid_base[reg] then
    805 	werror("bad base register `"..map_reg_rev[reg].."'")
    806       end
    807 
    808       -- [reg] or [reg+-disp]
    809       t.disp = toint(tailr) or (tailr == "" and 0)
    810       if t.disp then break end
    811 
    812       -- [reg+xreg...]
    813       local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
    814       xreg, t.xreg, tp = rtexpr(xreg)
    815       if not t.xreg then
    816 	-- [reg+-expr]
    817 	t.disp = dispexpr(tailr)
    818 	break
    819       end
    820       if not map_reg_valid_index[xreg] then
    821 	werror("bad index register `"..map_reg_rev[xreg].."'")
    822       end
    823 
    824       if t.xreg == -1 then
    825 	t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
    826 	if not t.vxreg then werror("bad variable register expression") end
    827       end
    828 
    829       -- [reg+xreg*xsc...]
    830       local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
    831       if xsc then
    832 	t.xsc = map_xsc[xsc]
    833 	tailx = tailsc
    834       end
    835 
    836       -- [...] or [...+-disp] or [...+-expr]
    837       t.disp = dispexpr(tailx)
    838     else
    839       -- imm or opsize*imm
    840       local imm = toint(expr)
    841       if not imm and sub(expr, 1, 1) == "*" and t.opsize then
    842 	imm = toint(sub(expr, 2))
    843 	if imm then
    844 	  imm = imm * map_opsizenum[t.opsize]
    845 	  t.opsize = nil
    846 	end
    847       end
    848       if imm then
    849 	if t.opsize then werror("bad operand size override") end
    850 	local m = "i"
    851 	if imm == 1 then m = m.."1" end
    852 	if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
    853 	if imm >= -128 and imm <= 127 then m = m.."S" end
    854 	t.imm = imm
    855 	t.mode = m
    856 	break
    857       end
    858 
    859       local tp
    860       local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
    861       reg, t.reg, tp = rtexpr(reg)
    862       if t.reg then
    863 	if t.reg == -1 then
    864 	  t.vreg, tailr = match(tailr, "^(%b())(.*)$")
    865 	  if not t.vreg then werror("bad variable register expression") end
    866 	end
    867 	-- reg
    868 	if tailr == "" then
    869 	  if t.opsize then werror("bad operand size override") end
    870 	  t.opsize = map_reg_opsize[reg]
    871 	  if t.opsize == "f" then
    872 	    t.mode = t.reg == 0 and "fF" or "f"
    873 	  else
    874 	    if reg == "@w4" or (x64 and reg == "@d4") then
    875 	      wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
    876 	    end
    877 	    t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
    878 	  end
    879 	  t.needrex = map_reg_needrex[reg]
    880 	  break
    881 	end
    882 
    883 	-- type[idx], type[idx].field, type->field -> [reg+offset_expr]
    884 	if not tp then werror("bad operand `"..param.."'") end
    885 	t.mode = "xm"
    886 	t.disp = format(tp.ctypefmt, tailr)
    887       else
    888 	t.mode, t.imm = immexpr(expr)
    889 	if sub(t.mode, -1) == "J" then
    890 	  if t.opsize and t.opsize ~= addrsize then
    891 	    werror("bad operand size override")
    892 	  end
    893 	  t.opsize = addrsize
    894 	end
    895       end
    896     end
    897   until true
    898   return t
    899 end
    900 
    901 ------------------------------------------------------------------------------
    902 -- x86 Template String Description
    903 -- ===============================
    904 --
    905 -- Each template string is a list of [match:]pattern pairs,
    906 -- separated by "|". The first match wins. No match means a
    907 -- bad or unsupported combination of operand modes or sizes.
    908 --
    909 -- The match part and the ":" is omitted if the operation has
    910 -- no operands. Otherwise the first N characters are matched
    911 -- against the mode strings of each of the N operands.
    912 --
    913 -- The mode string for each operand type is (see parseoperand()):
    914 --   Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
    915 --   FP register:      "f",  +"F" for st0
    916 --   Index operand:    "xm", +"O" for [disp] (pure offset)
    917 --   Immediate:        "i",  +"S" for signed 8 bit, +"1" for 1,
    918 --                     +"I" for arg, +"P" for pointer
    919 --   Any:              +"J" for valid jump targets
    920 --
    921 -- So a match character "m" (mixed) matches both an integer register
    922 -- and an index operand (to be encoded with the ModRM/SIB scheme).
    923 -- But "r" matches only a register and "x" only an index operand
    924 -- (e.g. for FP memory access operations).
    925 --
    926 -- The operand size match string starts right after the mode match
    927 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
    928 -- The effective data size of the operation is matched against this list.
    929 --
    930 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
    931 -- present, then all operands must be the same size. Unspecified sizes
    932 -- are ignored, but at least one operand must have a size or the pattern
    933 -- won't match (use the "byte", "word", "dword", "qword", "tword"
    934 -- operand size overrides. E.g.: mov dword [eax], 1).
    935 --
    936 -- If the list has a "1" or "2" prefix, the operand size is taken
    937 -- from the respective operand and any other operand sizes are ignored.
    938 -- If the list contains only ".", all operand sizes are ignored.
    939 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
    940 -- are compared to the match.
    941 --
    942 -- E.g. "rrdw" matches for either two dword registers or two word
    943 -- registers. "Fx2dq" matches an st0 operand plus an index operand
    944 -- pointing to a dword (float) or qword (double).
    945 --
    946 -- Every character after the ":" is part of the pattern string:
    947 --   Hex chars are accumulated to form the opcode (left to right).
    948 --   "n"       disables the standard opcode mods
    949 --             (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
    950 --   "X"       Force REX.W.
    951 --   "r"/"R"   adds the reg. number from the 1st/2nd operand to the opcode.
    952 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
    953 --             The spare 3 bits are either filled with the last hex digit or
    954 --             the result from a previous "r"/"R". The opcode is restored.
    955 --   "u"       Use VEX encoding, vvvv unused.
    956 --   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
    957 --             removed from the list used by future characters).
    958 --   "L"       Force VEX.L
    959 --
    960 -- All of the following characters force a flush of the opcode:
    961 --   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
    962 --   "s"       stores a 4 bit immediate from the last register operand,
    963 --             followed by 4 zero bits.
    964 --   "S"       stores a signed 8 bit immediate from the last operand.
    965 --   "U"       stores an unsigned 8 bit immediate from the last operand.
    966 --   "W"       stores an unsigned 16 bit immediate from the last operand.
    967 --   "i"       stores an operand sized immediate from the last operand.
    968 --   "I"       dito, but generates an action code to optionally modify
    969 --             the opcode (+2) for a signed 8 bit immediate.
    970 --   "J"       generates one of the REL action codes from the last operand.
    971 --
    972 ------------------------------------------------------------------------------
    973 
    974 -- Template strings for x86 instructions. Ordered by first opcode byte.
    975 -- Unimplemented opcodes (deliberate omissions) are marked with *.
    976 local map_op = {
    977   -- 00-05: add...
    978   -- 06: *push es
    979   -- 07: *pop es
    980   -- 08-0D: or...
    981   -- 0E: *push cs
    982   -- 0F: two byte opcode prefix
    983   -- 10-15: adc...
    984   -- 16: *push ss
    985   -- 17: *pop ss
    986   -- 18-1D: sbb...
    987   -- 1E: *push ds
    988   -- 1F: *pop ds
    989   -- 20-25: and...
    990   es_0 =	"26",
    991   -- 27: *daa
    992   -- 28-2D: sub...
    993   cs_0 =	"2E",
    994   -- 2F: *das
    995   -- 30-35: xor...
    996   ss_0 =	"36",
    997   -- 37: *aaa
    998   -- 38-3D: cmp...
    999   ds_0 =	"3E",
   1000   -- 3F: *aas
   1001   inc_1 =	x64 and "m:FF0m" or "rdw:40r|m:FF0m",
   1002   dec_1 =	x64 and "m:FF1m" or "rdw:48r|m:FF1m",
   1003   push_1 =	(x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
   1004 			 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
   1005   pop_1 =	x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
   1006   -- 60: *pusha, *pushad, *pushaw
   1007   -- 61: *popa, *popad, *popaw
   1008   -- 62: *bound rdw,x
   1009   -- 63: x86: *arpl mw,rw
   1010   movsxd_2 =	x64 and "rm/qd:63rM",
   1011   fs_0 =	"64",
   1012   gs_0 =	"65",
   1013   o16_0 =	"66",
   1014   a16_0 =	not x64 and "67" or nil,
   1015   a32_0 =	x64 and "67",
   1016   -- 68: push idw
   1017   -- 69: imul rdw,mdw,idw
   1018   -- 6A: push ib
   1019   -- 6B: imul rdw,mdw,S
   1020   -- 6C: *insb
   1021   -- 6D: *insd, *insw
   1022   -- 6E: *outsb
   1023   -- 6F: *outsd, *outsw
   1024   -- 70-7F: jcc lb
   1025   -- 80: add... mb,i
   1026   -- 81: add... mdw,i
   1027   -- 82: *undefined
   1028   -- 83: add... mdw,S
   1029   test_2 =	"mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
   1030   -- 86: xchg rb,mb
   1031   -- 87: xchg rdw,mdw
   1032   -- 88: mov mb,r
   1033   -- 89: mov mdw,r
   1034   -- 8A: mov r,mb
   1035   -- 8B: mov r,mdw
   1036   -- 8C: *mov mdw,seg
   1037   lea_2 =	"rx1dq:8DrM",
   1038   -- 8E: *mov seg,mdw
   1039   -- 8F: pop mdw
   1040   nop_0 =	"90",
   1041   xchg_2 =	"Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
   1042   cbw_0 =	"6698",
   1043   cwde_0 =	"98",
   1044   cdqe_0 =	"4898",
   1045   cwd_0 =	"6699",
   1046   cdq_0 =	"99",
   1047   cqo_0 =	"4899",
   1048   -- 9A: *call iw:idw
   1049   wait_0 =	"9B",
   1050   fwait_0 =	"9B",
   1051   pushf_0 =	"9C",
   1052   pushfd_0 =	not x64 and "9C",
   1053   pushfq_0 =	x64 and "9C",
   1054   popf_0 =	"9D",
   1055   popfd_0 =	not x64 and "9D",
   1056   popfq_0 =	x64 and "9D",
   1057   sahf_0 =	"9E",
   1058   lahf_0 =	"9F",
   1059   mov_2 =	"OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
   1060   movsb_0 =	"A4",
   1061   movsw_0 =	"66A5",
   1062   movsd_0 =	"A5",
   1063   cmpsb_0 =	"A6",
   1064   cmpsw_0 =	"66A7",
   1065   cmpsd_0 =	"A7",
   1066   -- A8: test Rb,i
   1067   -- A9: test Rdw,i
   1068   stosb_0 =	"AA",
   1069   stosw_0 =	"66AB",
   1070   stosd_0 =	"AB",
   1071   lodsb_0 =	"AC",
   1072   lodsw_0 =	"66AD",
   1073   lodsd_0 =	"AD",
   1074   scasb_0 =	"AE",
   1075   scasw_0 =	"66AF",
   1076   scasd_0 =	"AF",
   1077   -- B0-B7: mov rb,i
   1078   -- B8-BF: mov rdw,i
   1079   -- C0: rol... mb,i
   1080   -- C1: rol... mdw,i
   1081   ret_1 =	"i.:nC2W",
   1082   ret_0 =	"C3",
   1083   -- C4: *les rdw,mq
   1084   -- C5: *lds rdw,mq
   1085   -- C6: mov mb,i
   1086   -- C7: mov mdw,i
   1087   -- C8: *enter iw,ib
   1088   leave_0 =	"C9",
   1089   -- CA: *retf iw
   1090   -- CB: *retf
   1091   int3_0 =	"CC",
   1092   int_1 =	"i.:nCDU",
   1093   into_0 =	"CE",
   1094   -- CF: *iret
   1095   -- D0: rol... mb,1
   1096   -- D1: rol... mdw,1
   1097   -- D2: rol... mb,cl
   1098   -- D3: rol... mb,cl
   1099   -- D4: *aam ib
   1100   -- D5: *aad ib
   1101   -- D6: *salc
   1102   -- D7: *xlat
   1103   -- D8-DF: floating point ops
   1104   -- E0: *loopne
   1105   -- E1: *loope
   1106   -- E2: *loop
   1107   -- E3: *jcxz, *jecxz
   1108   -- E4: *in Rb,ib
   1109   -- E5: *in Rdw,ib
   1110   -- E6: *out ib,Rb
   1111   -- E7: *out ib,Rdw
   1112   call_1 =	x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
   1113   jmp_1 =	x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
   1114   -- EA: *jmp iw:idw
   1115   -- EB: jmp ib
   1116   -- EC: *in Rb,dx
   1117   -- ED: *in Rdw,dx
   1118   -- EE: *out dx,Rb
   1119   -- EF: *out dx,Rdw
   1120   lock_0 =	"F0",
   1121   int1_0 =	"F1",
   1122   repne_0 =	"F2",
   1123   repnz_0 =	"F2",
   1124   rep_0 =	"F3",
   1125   repe_0 =	"F3",
   1126   repz_0 =	"F3",
   1127   -- F4: *hlt
   1128   cmc_0 =	"F5",
   1129   -- F6: test... mb,i; div... mb
   1130   -- F7: test... mdw,i; div... mdw
   1131   clc_0 =	"F8",
   1132   stc_0 =	"F9",
   1133   -- FA: *cli
   1134   cld_0 =	"FC",
   1135   std_0 =	"FD",
   1136   -- FE: inc... mb
   1137   -- FF: inc... mdw
   1138 
   1139   -- misc ops
   1140   not_1 =	"m:F72m",
   1141   neg_1 =	"m:F73m",
   1142   mul_1 =	"m:F74m",
   1143   imul_1 =	"m:F75m",
   1144   div_1 =	"m:F76m",
   1145   idiv_1 =	"m:F77m",
   1146 
   1147   imul_2 =	"rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
   1148   imul_3 =	"rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
   1149 
   1150   movzx_2 =	"rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
   1151   movsx_2 =	"rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
   1152 
   1153   bswap_1 =	"rqd:0FC8r",
   1154   bsf_2 =	"rmqdw:0FBCrM",
   1155   bsr_2 =	"rmqdw:0FBDrM",
   1156   bt_2 =	"mrqdw:0FA3Rm|miqdw:0FBA4mU",
   1157   btc_2 =	"mrqdw:0FBBRm|miqdw:0FBA7mU",
   1158   btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
   1159   bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
   1160 
   1161   shld_3 =	"mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
   1162   shrd_3 =	"mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
   1163 
   1164   rdtsc_0 =	"0F31", -- P1+
   1165   rdpmc_0 =	"0F33", -- P6+
   1166   cpuid_0 =	"0FA2", -- P1+
   1167 
   1168   -- floating point ops
   1169   fst_1 =	"ff:DDD0r|xd:D92m|xq:nDD2m",
   1170   fstp_1 =	"ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
   1171   fld_1 =	"ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
   1172 
   1173   fpop_0 =	"DDD8", -- Alias for fstp st0.
   1174 
   1175   fist_1 =	"xw:nDF2m|xd:DB2m",
   1176   fistp_1 =	"xw:nDF3m|xd:DB3m|xq:nDF7m",
   1177   fild_1 =	"xw:nDF0m|xd:DB0m|xq:nDF5m",
   1178 
   1179   fxch_0 =	"D9C9",
   1180   fxch_1 =	"ff:D9C8r",
   1181   fxch_2 =	"fFf:D9C8r|Fff:D9C8R",
   1182 
   1183   fucom_1 =	"ff:DDE0r",
   1184   fucom_2 =	"Fff:DDE0R",
   1185   fucomp_1 =	"ff:DDE8r",
   1186   fucomp_2 =	"Fff:DDE8R",
   1187   fucomi_1 =	"ff:DBE8r", -- P6+
   1188   fucomi_2 =	"Fff:DBE8R", -- P6+
   1189   fucomip_1 =	"ff:DFE8r", -- P6+
   1190   fucomip_2 =	"Fff:DFE8R", -- P6+
   1191   fcomi_1 =	"ff:DBF0r", -- P6+
   1192   fcomi_2 =	"Fff:DBF0R", -- P6+
   1193   fcomip_1 =	"ff:DFF0r", -- P6+
   1194   fcomip_2 =	"Fff:DFF0R", -- P6+
   1195   fucompp_0 =	"DAE9",
   1196   fcompp_0 =	"DED9",
   1197 
   1198   fldenv_1 =	"x.:D94m",
   1199   fnstenv_1 =	"x.:D96m",
   1200   fstenv_1 =	"x.:9BD96m",
   1201   fldcw_1 =	"xw:nD95m",
   1202   fstcw_1 =	"xw:n9BD97m",
   1203   fnstcw_1 =	"xw:nD97m",
   1204   fstsw_1 =	"Rw:n9BDFE0|xw:n9BDD7m",
   1205   fnstsw_1 =	"Rw:nDFE0|xw:nDD7m",
   1206   fclex_0 =	"9BDBE2",
   1207   fnclex_0 =	"DBE2",
   1208 
   1209   fnop_0 =	"D9D0",
   1210   -- D9D1-D9DF: unassigned
   1211 
   1212   fchs_0 =	"D9E0",
   1213   fabs_0 =	"D9E1",
   1214   -- D9E2: unassigned
   1215   -- D9E3: unassigned
   1216   ftst_0 =	"D9E4",
   1217   fxam_0 =	"D9E5",
   1218   -- D9E6: unassigned
   1219   -- D9E7: unassigned
   1220   fld1_0 =	"D9E8",
   1221   fldl2t_0 =	"D9E9",
   1222   fldl2e_0 =	"D9EA",
   1223   fldpi_0 =	"D9EB",
   1224   fldlg2_0 =	"D9EC",
   1225   fldln2_0 =	"D9ED",
   1226   fldz_0 =	"D9EE",
   1227   -- D9EF: unassigned
   1228 
   1229   f2xm1_0 =	"D9F0",
   1230   fyl2x_0 =	"D9F1",
   1231   fptan_0 =	"D9F2",
   1232   fpatan_0 =	"D9F3",
   1233   fxtract_0 =	"D9F4",
   1234   fprem1_0 =	"D9F5",
   1235   fdecstp_0 =	"D9F6",
   1236   fincstp_0 =	"D9F7",
   1237   fprem_0 =	"D9F8",
   1238   fyl2xp1_0 =	"D9F9",
   1239   fsqrt_0 =	"D9FA",
   1240   fsincos_0 =	"D9FB",
   1241   frndint_0 =	"D9FC",
   1242   fscale_0 =	"D9FD",
   1243   fsin_0 =	"D9FE",
   1244   fcos_0 =	"D9FF",
   1245 
   1246   -- SSE, SSE2
   1247   andnpd_2 =	"rmo:660F55rM",
   1248   andnps_2 =	"rmo:0F55rM",
   1249   andpd_2 =	"rmo:660F54rM",
   1250   andps_2 =	"rmo:0F54rM",
   1251   clflush_1 =	"x.:0FAE7m",
   1252   cmppd_3 =	"rmio:660FC2rMU",
   1253   cmpps_3 =	"rmio:0FC2rMU",
   1254   cmpsd_3 =	"rrio:F20FC2rMU|rxi/oq:",
   1255   cmpss_3 =	"rrio:F30FC2rMU|rxi/od:",
   1256   comisd_2 =	"rro:660F2FrM|rx/oq:",
   1257   comiss_2 =	"rro:0F2FrM|rx/od:",
   1258   cvtdq2pd_2 =	"rro:F30FE6rM|rx/oq:",
   1259   cvtdq2ps_2 =	"rmo:0F5BrM",
   1260   cvtpd2dq_2 =	"rmo:F20FE6rM",
   1261   cvtpd2ps_2 =	"rmo:660F5ArM",
   1262   cvtpi2pd_2 =	"rx/oq:660F2ArM",
   1263   cvtpi2ps_2 =	"rx/oq:0F2ArM",
   1264   cvtps2dq_2 =	"rmo:660F5BrM",
   1265   cvtps2pd_2 =	"rro:0F5ArM|rx/oq:",
   1266   cvtsd2si_2 =	"rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
   1267   cvtsd2ss_2 =	"rro:F20F5ArM|rx/oq:",
   1268   cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
   1269   cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
   1270   cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
   1271   cvtss2si_2 =	"rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
   1272   cvttpd2dq_2 =	"rmo:660FE6rM",
   1273   cvttps2dq_2 =	"rmo:F30F5BrM",
   1274   cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
   1275   cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
   1276   fxsave_1 =	"x.:0FAE0m",
   1277   fxrstor_1 =	"x.:0FAE1m",
   1278   ldmxcsr_1 =	"xd:0FAE2m",
   1279   lfence_0 =	"0FAEE8",
   1280   maskmovdqu_2 = "rro:660FF7rM",
   1281   mfence_0 =	"0FAEF0",
   1282   movapd_2 =	"rmo:660F28rM|mro:660F29Rm",
   1283   movaps_2 =	"rmo:0F28rM|mro:0F29Rm",
   1284   movd_2 =	"rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
   1285   movdqa_2 =	"rmo:660F6FrM|mro:660F7FRm",
   1286   movdqu_2 =	"rmo:F30F6FrM|mro:F30F7FRm",
   1287   movhlps_2 =	"rro:0F12rM",
   1288   movhpd_2 =	"rx/oq:660F16rM|xr/qo:n660F17Rm",
   1289   movhps_2 =	"rx/oq:0F16rM|xr/qo:n0F17Rm",
   1290   movlhps_2 =	"rro:0F16rM",
   1291   movlpd_2 =	"rx/oq:660F12rM|xr/qo:n660F13Rm",
   1292   movlps_2 =	"rx/oq:0F12rM|xr/qo:n0F13Rm",
   1293   movmskpd_2 =	"rr/do:660F50rM",
   1294   movmskps_2 =	"rr/do:0F50rM",
   1295   movntdq_2 =	"xro:660FE7Rm",
   1296   movnti_2 =	"xrqd:0FC3Rm",
   1297   movntpd_2 =	"xro:660F2BRm",
   1298   movntps_2 =	"xro:0F2BRm",
   1299   movq_2 =	"rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
   1300   movsd_2 =	"rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
   1301   movss_2 =	"rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
   1302   movupd_2 =	"rmo:660F10rM|mro:660F11Rm",
   1303   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
   1304   orpd_2 =	"rmo:660F56rM",
   1305   orps_2 =	"rmo:0F56rM",
   1306   pause_0 =	"F390",
   1307   pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
   1308   pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
   1309   pmovmskb_2 =	"rr/do:660FD7rM",
   1310   prefetchnta_1 = "xb:n0F180m",
   1311   prefetcht0_1 = "xb:n0F181m",
   1312   prefetcht1_1 = "xb:n0F182m",
   1313   prefetcht2_1 = "xb:n0F183m",
   1314   pshufd_3 =	"rmio:660F70rMU",
   1315   pshufhw_3 =	"rmio:F30F70rMU",
   1316   pshuflw_3 =	"rmio:F20F70rMU",
   1317   pslld_2 =	"rmo:660FF2rM|rio:660F726mU",
   1318   pslldq_2 =	"rio:660F737mU",
   1319   psllq_2 =	"rmo:660FF3rM|rio:660F736mU",
   1320   psllw_2 =	"rmo:660FF1rM|rio:660F716mU",
   1321   psrad_2 =	"rmo:660FE2rM|rio:660F724mU",
   1322   psraw_2 =	"rmo:660FE1rM|rio:660F714mU",
   1323   psrld_2 =	"rmo:660FD2rM|rio:660F722mU",
   1324   psrldq_2 =	"rio:660F733mU",
   1325   psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
   1326   psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
   1327   rcpps_2 =	"rmo:0F53rM",
   1328   rcpss_2 =	"rro:F30F53rM|rx/od:",
   1329   rsqrtps_2 =	"rmo:0F52rM",
   1330   rsqrtss_2 =	"rmo:F30F52rM",
   1331   sfence_0 =	"0FAEF8",
   1332   shufpd_3 =	"rmio:660FC6rMU",
   1333   shufps_3 =	"rmio:0FC6rMU",
   1334   stmxcsr_1 =   "xd:0FAE3m",
   1335   ucomisd_2 =	"rro:660F2ErM|rx/oq:",
   1336   ucomiss_2 =	"rro:0F2ErM|rx/od:",
   1337   unpckhpd_2 =	"rmo:660F15rM",
   1338   unpckhps_2 =	"rmo:0F15rM",
   1339   unpcklpd_2 =	"rmo:660F14rM",
   1340   unpcklps_2 =	"rmo:0F14rM",
   1341   xorpd_2 =	"rmo:660F57rM",
   1342   xorps_2 =	"rmo:0F57rM",
   1343 
   1344   -- SSE3 ops
   1345   fisttp_1 =	"xw:nDF1m|xd:DB1m|xq:nDD1m",
   1346   addsubpd_2 =	"rmo:660FD0rM",
   1347   addsubps_2 =	"rmo:F20FD0rM",
   1348   haddpd_2 =	"rmo:660F7CrM",
   1349   haddps_2 =	"rmo:F20F7CrM",
   1350   hsubpd_2 =	"rmo:660F7DrM",
   1351   hsubps_2 =	"rmo:F20F7DrM",
   1352   lddqu_2 =	"rxo:F20FF0rM",
   1353   movddup_2 =	"rmo:F20F12rM",
   1354   movshdup_2 =	"rmo:F30F16rM",
   1355   movsldup_2 =	"rmo:F30F12rM",
   1356 
   1357   -- SSSE3 ops
   1358   pabsb_2 =	"rmo:660F381CrM",
   1359   pabsd_2 =	"rmo:660F381ErM",
   1360   pabsw_2 =	"rmo:660F381DrM",
   1361   palignr_3 =	"rmio:660F3A0FrMU",
   1362   phaddd_2 =	"rmo:660F3802rM",
   1363   phaddsw_2 =	"rmo:660F3803rM",
   1364   phaddw_2 =	"rmo:660F3801rM",
   1365   phsubd_2 =	"rmo:660F3806rM",
   1366   phsubsw_2 =	"rmo:660F3807rM",
   1367   phsubw_2 =	"rmo:660F3805rM",
   1368   pmaddubsw_2 =	"rmo:660F3804rM",
   1369   pmulhrsw_2 =	"rmo:660F380BrM",
   1370   pshufb_2 =	"rmo:660F3800rM",
   1371   psignb_2 =	"rmo:660F3808rM",
   1372   psignd_2 =	"rmo:660F380ArM",
   1373   psignw_2 =	"rmo:660F3809rM",
   1374 
   1375   -- SSE4.1 ops
   1376   blendpd_3 =	"rmio:660F3A0DrMU",
   1377   blendps_3 =	"rmio:660F3A0CrMU",
   1378   blendvpd_3 =	"rmRo:660F3815rM",
   1379   blendvps_3 =	"rmRo:660F3814rM",
   1380   dppd_3 =	"rmio:660F3A41rMU",
   1381   dpps_3 =	"rmio:660F3A40rMU",
   1382   extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
   1383   insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
   1384   movntdqa_2 =	"rxo:660F382ArM",
   1385   mpsadbw_3 =	"rmio:660F3A42rMU",
   1386   packusdw_2 =	"rmo:660F382BrM",
   1387   pblendvb_3 =	"rmRo:660F3810rM",
   1388   pblendw_3 =	"rmio:660F3A0ErMU",
   1389   pcmpeqq_2 =	"rmo:660F3829rM",
   1390   pextrb_3 =	"rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
   1391   pextrd_3 =	"mri/do:660F3A16RmU",
   1392   pextrq_3 =	"mri/qo:660F3A16RmU",
   1393   -- pextrw is SSE2, mem operand is SSE4.1 only
   1394   phminposuw_2 = "rmo:660F3841rM",
   1395   pinsrb_3 =	"rri/od:660F3A20nrMU|rxi/ob:",
   1396   pinsrd_3 =	"rmi/od:660F3A22rMU",
   1397   pinsrq_3 =	"rmi/oq:660F3A22rXMU",
   1398   pmaxsb_2 =	"rmo:660F383CrM",
   1399   pmaxsd_2 =	"rmo:660F383DrM",
   1400   pmaxud_2 =	"rmo:660F383FrM",
   1401   pmaxuw_2 =	"rmo:660F383ErM",
   1402   pminsb_2 =	"rmo:660F3838rM",
   1403   pminsd_2 =	"rmo:660F3839rM",
   1404   pminud_2 =	"rmo:660F383BrM",
   1405   pminuw_2 =	"rmo:660F383ArM",
   1406   pmovsxbd_2 =	"rro:660F3821rM|rx/od:",
   1407   pmovsxbq_2 =	"rro:660F3822rM|rx/ow:",
   1408   pmovsxbw_2 =	"rro:660F3820rM|rx/oq:",
   1409   pmovsxdq_2 =	"rro:660F3825rM|rx/oq:",
   1410   pmovsxwd_2 =	"rro:660F3823rM|rx/oq:",
   1411   pmovsxwq_2 =	"rro:660F3824rM|rx/od:",
   1412   pmovzxbd_2 =	"rro:660F3831rM|rx/od:",
   1413   pmovzxbq_2 =	"rro:660F3832rM|rx/ow:",
   1414   pmovzxbw_2 =	"rro:660F3830rM|rx/oq:",
   1415   pmovzxdq_2 =	"rro:660F3835rM|rx/oq:",
   1416   pmovzxwd_2 =	"rro:660F3833rM|rx/oq:",
   1417   pmovzxwq_2 =	"rro:660F3834rM|rx/od:",
   1418   pmuldq_2 =	"rmo:660F3828rM",
   1419   pmulld_2 =	"rmo:660F3840rM",
   1420   ptest_2 =	"rmo:660F3817rM",
   1421   roundpd_3 =	"rmio:660F3A09rMU",
   1422   roundps_3 =	"rmio:660F3A08rMU",
   1423   roundsd_3 =	"rrio:660F3A0BrMU|rxi/oq:",
   1424   roundss_3 =	"rrio:660F3A0ArMU|rxi/od:",
   1425 
   1426   -- SSE4.2 ops
   1427   crc32_2 =	"rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
   1428   pcmpestri_3 =	"rmio:660F3A61rMU",
   1429   pcmpestrm_3 =	"rmio:660F3A60rMU",
   1430   pcmpgtq_2 =	"rmo:660F3837rM",
   1431   pcmpistri_3 =	"rmio:660F3A63rMU",
   1432   pcmpistrm_3 =	"rmio:660F3A62rMU",
   1433   popcnt_2 =	"rmqdw:F30FB8rM",
   1434 
   1435   -- SSE4a
   1436   extrq_2 =	"rro:660F79rM",
   1437   extrq_3 =	"riio:660F780mUU",
   1438   insertq_2 =	"rro:F20F79rM",
   1439   insertq_4 =	"rriio:F20F78rMUU",
   1440   lzcnt_2 =	"rmqdw:F30FBDrM",
   1441   movntsd_2 =	"xr/qo:nF20F2BRm",
   1442   movntss_2 =	"xr/do:F30F2BRm",
   1443   -- popcnt is also in SSE4.2
   1444 
   1445   -- AES-NI
   1446   aesdec_2 =	"rmo:660F38DErM",
   1447   aesdeclast_2 = "rmo:660F38DFrM",
   1448   aesenc_2 =	"rmo:660F38DCrM",
   1449   aesenclast_2 = "rmo:660F38DDrM",
   1450   aesimc_2 =	"rmo:660F38DBrM",
   1451   aeskeygenassist_3 = "rmio:660F3ADFrMU",
   1452   pclmulqdq_3 =	"rmio:660F3A44rMU",
   1453 
   1454    -- AVX FP ops
   1455   vaddsubpd_3 =	"rrmoy:660FVD0rM",
   1456   vaddsubps_3 =	"rrmoy:F20FVD0rM",
   1457   vandpd_3 =	"rrmoy:660FV54rM",
   1458   vandps_3 =	"rrmoy:0FV54rM",
   1459   vandnpd_3 =	"rrmoy:660FV55rM",
   1460   vandnps_3 =	"rrmoy:0FV55rM",
   1461   vblendpd_4 =	"rrmioy:660F3AV0DrMU",
   1462   vblendps_4 =	"rrmioy:660F3AV0CrMU",
   1463   vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
   1464   vblendvps_4 =	"rrmroy:660F3AV4ArMs",
   1465   vbroadcastf128_2 = "rx/yo:660F38u1ArM",
   1466   vcmppd_4 =	"rrmioy:660FVC2rMU",
   1467   vcmpps_4 =	"rrmioy:0FVC2rMU",
   1468   vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
   1469   vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
   1470   vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
   1471   vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
   1472   vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
   1473   vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
   1474   vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
   1475   vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
   1476   vcvtps2dq_2 =	"rmoy:660Fu5BrM",
   1477   vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
   1478   vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
   1479   vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
   1480   vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
   1481   vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
   1482   vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
   1483   vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
   1484   vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
   1485   vcvttps2dq_2 = "rmoy:F30Fu5BrM",
   1486   vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
   1487   vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
   1488   vdppd_4 =	"rrmio:660F3AV41rMU",
   1489   vdpps_4 =	"rrmioy:660F3AV40rMU",
   1490   vextractf128_3 = "mri/oy:660F3AuL19RmU",
   1491   vextractps_3 = "mri/do:660F3Au17RmU",
   1492   vhaddpd_3 =	"rrmoy:660FV7CrM",
   1493   vhaddps_3 =	"rrmoy:F20FV7CrM",
   1494   vhsubpd_3 =	"rrmoy:660FV7DrM",
   1495   vhsubps_3 =	"rrmoy:F20FV7DrM",
   1496   vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
   1497   vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
   1498   vldmxcsr_1 =	"xd:0FuAE2m",
   1499   vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
   1500   vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
   1501   vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
   1502   vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
   1503   vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
   1504   vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
   1505   vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
   1506   vmovhlps_3 =	"rrro:0FV12rM",
   1507   vmovhpd_2 =	"xr/qo:660Fu17Rm",
   1508   vmovhpd_3 =	"rrx/ooq:660FV16rM",
   1509   vmovhps_2 =	"xr/qo:0Fu17Rm",
   1510   vmovhps_3 =	"rrx/ooq:0FV16rM",
   1511   vmovlhps_3 =	"rrro:0FV16rM",
   1512   vmovlpd_2 =	"xr/qo:660Fu13Rm",
   1513   vmovlpd_3 =	"rrx/ooq:660FV12rM",
   1514   vmovlps_2 =	"xr/qo:0Fu13Rm",
   1515   vmovlps_3 =	"rrx/ooq:0FV12rM",
   1516   vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
   1517   vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
   1518   vmovntpd_2 =	"xroy:660Fu2BRm",
   1519   vmovntps_2 =	"xroy:0Fu2BRm",
   1520   vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
   1521   vmovsd_3 =	"rrro:F20FV10rM",
   1522   vmovshdup_2 =	"rmoy:F30Fu16rM",
   1523   vmovsldup_2 =	"rmoy:F30Fu12rM",
   1524   vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
   1525   vmovss_3 =	"rrro:F30FV10rM",
   1526   vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
   1527   vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
   1528   vorpd_3 =	"rrmoy:660FV56rM",
   1529   vorps_3 =	"rrmoy:0FV56rM",
   1530   vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
   1531   vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
   1532   vperm2f128_4 = "rrmiy:660F3AV06rMU",
   1533   vptestpd_2 =	"rmoy:660F38u0FrM",
   1534   vptestps_2 =	"rmoy:660F38u0ErM",
   1535   vrcpps_2 =	"rmoy:0Fu53rM",
   1536   vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
   1537   vrsqrtps_2 =	"rmoy:0Fu52rM",
   1538   vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
   1539   vroundpd_3 =	"rmioy:660F3AV09rMU",
   1540   vroundps_3 =	"rmioy:660F3AV08rMU",
   1541   vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
   1542   vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
   1543   vshufpd_4 =	"rrmioy:660FVC6rMU",
   1544   vshufps_4 =	"rrmioy:0FVC6rMU",
   1545   vsqrtps_2 =	"rmoy:0Fu51rM",
   1546   vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
   1547   vsqrtpd_2 =	"rmoy:660Fu51rM",
   1548   vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
   1549   vstmxcsr_1 =	"xd:0FuAE3m",
   1550   vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
   1551   vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
   1552   vunpckhpd_3 =	"rrmoy:660FV15rM",
   1553   vunpckhps_3 =	"rrmoy:0FV15rM",
   1554   vunpcklpd_3 =	"rrmoy:660FV14rM",
   1555   vunpcklps_3 =	"rrmoy:0FV14rM",
   1556   vxorpd_3 =	"rrmoy:660FV57rM",
   1557   vxorps_3 =	"rrmoy:0FV57rM",
   1558   vzeroall_0 =	"0FuL77",
   1559   vzeroupper_0 = "0Fu77",
   1560 
   1561   -- AVX2 FP ops
   1562   vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
   1563   vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
   1564   -- *vgather* (!vsib)
   1565   vpermpd_3 =	"rmiy:660F3AuX01rMU",
   1566   vpermps_3 =	"rrmy:660F38V16rM",
   1567 
   1568   -- AVX, AVX2 integer ops
   1569   -- In general, xmm requires AVX, ymm requires AVX2.
   1570   vaesdec_3 =  "rrmo:660F38VDErM",
   1571   vaesdeclast_3 = "rrmo:660F38VDFrM",
   1572   vaesenc_3 =  "rrmo:660F38VDCrM",
   1573   vaesenclast_3 = "rrmo:660F38VDDrM",
   1574   vaesimc_2 =  "rmo:660F38uDBrM",
   1575   vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
   1576   vlddqu_2 =	"rxoy:F20FuF0rM",
   1577   vmaskmovdqu_2 = "rro:660FuF7rM",
   1578   vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
   1579   vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
   1580   vmovntdq_2 =	"xroy:660FuE7Rm",
   1581   vmovntdqa_2 =	"rxoy:660F38u2ArM",
   1582   vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
   1583   vpabsb_2 =	"rmoy:660F38u1CrM",
   1584   vpabsd_2 =	"rmoy:660F38u1ErM",
   1585   vpabsw_2 =	"rmoy:660F38u1DrM",
   1586   vpackusdw_3 =	"rrmoy:660F38V2BrM",
   1587   vpalignr_4 =	"rrmioy:660F3AV0FrMU",
   1588   vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
   1589   vpblendw_4 =	"rrmioy:660F3AV0ErMU",
   1590   vpclmulqdq_4 = "rrmio:660F3AV44rMU",
   1591   vpcmpeqq_3 =	"rrmoy:660F38V29rM",
   1592   vpcmpestri_3 = "rmio:660F3Au61rMU",
   1593   vpcmpestrm_3 = "rmio:660F3Au60rMU",
   1594   vpcmpgtq_3 =	"rrmoy:660F38V37rM",
   1595   vpcmpistri_3 = "rmio:660F3Au63rMU",
   1596   vpcmpistrm_3 = "rmio:660F3Au62rMU",
   1597   vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
   1598   vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
   1599   vpextrd_3 =	"mri/do:660F3Au16RmU",
   1600   vpextrq_3 =	"mri/qo:660F3Au16RmU",
   1601   vphaddw_3 =	"rrmoy:660F38V01rM",
   1602   vphaddd_3 =	"rrmoy:660F38V02rM",
   1603   vphaddsw_3 =	"rrmoy:660F38V03rM",
   1604   vphminposuw_2 = "rmo:660F38u41rM",
   1605   vphsubw_3 =	"rrmoy:660F38V05rM",
   1606   vphsubd_3 =	"rrmoy:660F38V06rM",
   1607   vphsubsw_3 =	"rrmoy:660F38V07rM",
   1608   vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
   1609   vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
   1610   vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
   1611   vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
   1612   vpmaddubsw_3 = "rrmoy:660F38V04rM",
   1613   vpmaxsb_3 =	"rrmoy:660F38V3CrM",
   1614   vpmaxsd_3 =	"rrmoy:660F38V3DrM",
   1615   vpmaxuw_3 =	"rrmoy:660F38V3ErM",
   1616   vpmaxud_3 =	"rrmoy:660F38V3FrM",
   1617   vpminsb_3 =	"rrmoy:660F38V38rM",
   1618   vpminsd_3 =	"rrmoy:660F38V39rM",
   1619   vpminuw_3 =	"rrmoy:660F38V3ArM",
   1620   vpminud_3 =	"rrmoy:660F38V3BrM",
   1621   vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
   1622   vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
   1623   vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
   1624   vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
   1625   vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
   1626   vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
   1627   vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
   1628   vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
   1629   vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
   1630   vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
   1631   vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
   1632   vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
   1633   vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
   1634   vpmuldq_3 =	"rrmoy:660F38V28rM",
   1635   vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
   1636   vpmulld_3 =	"rrmoy:660F38V40rM",
   1637   vpshufb_3 =	"rrmoy:660F38V00rM",
   1638   vpshufd_3 =	"rmioy:660Fu70rMU",
   1639   vpshufhw_3 =	"rmioy:F30Fu70rMU",
   1640   vpshuflw_3 =	"rmioy:F20Fu70rMU",
   1641   vpsignb_3 =	"rrmoy:660F38V08rM",
   1642   vpsignw_3 =	"rrmoy:660F38V09rM",
   1643   vpsignd_3 =	"rrmoy:660F38V0ArM",
   1644   vpslldq_3 =	"rrioy:660Fv737mU",
   1645   vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
   1646   vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
   1647   vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
   1648   vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
   1649   vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
   1650   vpsrldq_3 =	"rrioy:660Fv733mU",
   1651   vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
   1652   vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
   1653   vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
   1654   vptest_2 =	"rmoy:660F38u17rM",
   1655 
   1656   -- AVX2 integer ops
   1657   vbroadcasti128_2 = "rx/yo:660F38u5ArM",
   1658   vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
   1659   vextracti128_3 = "mri/oy:660F3AuL39RmU",
   1660   vpblendd_4 =	"rrmioy:660F3AV02rMU",
   1661   vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
   1662   vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
   1663   vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
   1664   vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
   1665   vpermd_3 =	"rrmy:660F38V36rM",
   1666   vpermq_3 =	"rmiy:660F3AuX00rMU",
   1667   -- *vpgather* (!vsib)
   1668   vperm2i128_4 = "rrmiy:660F3AV46rMU",
   1669   vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
   1670   vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
   1671   vpsllvd_3 =	"rrmoy:660F38V47rM",
   1672   vpsllvq_3 =	"rrmoy:660F38VX47rM",
   1673   vpsravd_3 =	"rrmoy:660F38V46rM",
   1674   vpsrlvd_3 =	"rrmoy:660F38V45rM",
   1675   vpsrlvq_3 =	"rrmoy:660F38VX45rM",
   1676 
   1677   -- Intel ADX
   1678   adcx_2 =	"rmqd:660F38F6rM",
   1679   adox_2 =	"rmqd:F30F38F6rM",
   1680 }
   1681 
   1682 ------------------------------------------------------------------------------
   1683 
   1684 -- Arithmetic ops.
   1685 for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
   1686 		     ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
   1687   local n8 = shl(n, 3)
   1688   map_op[name.."_2"] = format(
   1689     "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
   1690     1+n8, 3+n8, n, n, 5+n8, n)
   1691 end
   1692 
   1693 -- Shift ops.
   1694 for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
   1695 		     shl = 4, shr = 5,          sar = 7, sal = 4 } do
   1696   map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
   1697 end
   1698 
   1699 -- Conditional ops.
   1700 for cc,n in pairs(map_cc) do
   1701   map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
   1702   map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
   1703   map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
   1704 end
   1705 
   1706 -- FP arithmetic ops.
   1707 for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
   1708 		     sub = 4, subr = 5, div = 6, divr = 7 } do
   1709   local nc = 0xc0 + shl(n, 3)
   1710   local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
   1711   local fn = "f"..name
   1712   map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
   1713   if n == 2 or n == 3 then
   1714     map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
   1715   else
   1716     map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
   1717     map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
   1718     map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
   1719   end
   1720   map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
   1721 end
   1722 
   1723 -- FP conditional moves.
   1724 for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
   1725   local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
   1726   map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
   1727   map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
   1728 end
   1729 
   1730 -- SSE / AVX FP arithmetic ops.
   1731 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
   1732 		     sub = 12, min = 13, div = 14, max = 15 } do
   1733   map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
   1734   map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
   1735   map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
   1736   map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
   1737   if n ~= 1 then
   1738     map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
   1739     map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
   1740     map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
   1741     map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
   1742   end
   1743 end
   1744 
   1745 -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
   1746 for name,n in pairs{
   1747   paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
   1748   paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
   1749   packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
   1750   paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
   1751   pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
   1752   pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
   1753   pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
   1754   pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
   1755   pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
   1756   pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
   1757   psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
   1758   psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
   1759   punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
   1760   punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
   1761   punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
   1762 } do
   1763   map_op[name.."_2"] = format("rmo:660F%02XrM", n)
   1764   map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
   1765 end
   1766 
   1767 ------------------------------------------------------------------------------
   1768 
   1769 local map_vexarg = { u = false, v = 1, V = 2 }
   1770 
   1771 -- Process pattern string.
   1772 local function dopattern(pat, args, sz, op, needrex)
   1773   local digit, addin, vex
   1774   local opcode = 0
   1775   local szov = sz
   1776   local narg = 1
   1777   local rex = 0
   1778 
   1779   -- Limit number of section buffer positions used by a single dasm_put().
   1780   -- A single opcode needs a maximum of 6 positions.
   1781   if secpos+6 > maxsecpos then wflush() end
   1782 
   1783   -- Process each character.
   1784   for c in gmatch(pat.."|", ".") do
   1785     if match(c, "%x") then	-- Hex digit.
   1786       digit = byte(c) - 48
   1787       if digit > 48 then digit = digit - 39
   1788       elseif digit > 16 then digit = digit - 7 end
   1789       opcode = opcode*16 + digit
   1790       addin = nil
   1791     elseif c == "n" then	-- Disable operand size mods for opcode.
   1792       szov = nil
   1793     elseif c == "X" then	-- Force REX.W.
   1794       rex = 8
   1795     elseif c == "L" then	-- Force VEX.L.
   1796       vex.l = true
   1797     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
   1798       addin = args[1]; opcode = opcode + (addin.reg % 8)
   1799       if narg < 2 then narg = 2 end
   1800     elseif c == "R" then	-- Merge 2nd operand regno. into opcode.
   1801       addin = args[2]; opcode = opcode + (addin.reg % 8)
   1802       narg = 3
   1803     elseif c == "m" or c == "M" then	-- Encode ModRM/SIB.
   1804       local s
   1805       if addin then
   1806 	s = addin.reg
   1807 	opcode = opcode - band(s, 7)	-- Undo regno opcode merge.
   1808       else
   1809 	s = band(opcode, 15)	-- Undo last digit.
   1810 	opcode = shr(opcode, 4)
   1811       end
   1812       local nn = c == "m" and 1 or 2
   1813       local t = args[nn]
   1814       if narg <= nn then narg = nn + 1 end
   1815       if szov == "q" and rex == 0 then rex = rex + 8 end
   1816       if t.reg and t.reg > 7 then rex = rex + 1 end
   1817       if t.xreg and t.xreg > 7 then rex = rex + 2 end
   1818       if s > 7 then rex = rex + 4 end
   1819       if needrex then rex = rex + 16 end
   1820       local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
   1821       opcode = nil
   1822       local imark = sub(pat, -1) -- Force a mark (ugly).
   1823       -- Put ModRM/SIB with regno/last digit as spare.
   1824       wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
   1825       addin = nil
   1826     elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
   1827       local b = band(opcode, 255); opcode = shr(opcode, 8)
   1828       local m = 1
   1829       if b == 0x38 then m = 2
   1830       elseif b == 0x3a then m = 3 end
   1831       if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
   1832       if b ~= 0x0f then
   1833 	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
   1834 	  "' in pattern `"..pat.."' for `"..op.."'")
   1835       end
   1836       local v = map_vexarg[c]
   1837       if v then v = remove(args, v) end
   1838       b = band(opcode, 255)
   1839       local p = 0
   1840       if b == 0x66 then p = 1
   1841       elseif b == 0xf3 then p = 2
   1842       elseif b == 0xf2 then p = 3 end
   1843       if p ~= 0 then opcode = shr(opcode, 8) end
   1844       if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
   1845       vex = { m = m, p = p, v = v }
   1846     else
   1847       if opcode then -- Flush opcode.
   1848 	if szov == "q" and rex == 0 then rex = rex + 8 end
   1849 	if needrex then rex = rex + 16 end
   1850 	if addin and addin.reg == -1 then
   1851 	  local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
   1852 	  wvreg("opcode", addin.vreg, psz, sk)
   1853 	else
   1854 	  if addin and addin.reg > 7 then rex = rex + 1 end
   1855 	  wputop(szov, opcode, rex, vex)
   1856 	end
   1857 	opcode = nil
   1858       end
   1859       if c == "|" then break end
   1860       if c == "o" then -- Offset (pure 32 bit displacement).
   1861 	wputdarg(args[1].disp); if narg < 2 then narg = 2 end
   1862       elseif c == "O" then
   1863 	wputdarg(args[2].disp); narg = 3
   1864       else
   1865 	-- Anything else is an immediate operand.
   1866 	local a = args[narg]
   1867 	narg = narg + 1
   1868 	local mode, imm = a.mode, a.imm
   1869 	if mode == "iJ" and not match("iIJ", c) then
   1870 	  werror("bad operand size for label")
   1871 	end
   1872 	if c == "S" then
   1873 	  wputsbarg(imm)
   1874 	elseif c == "U" then
   1875 	  wputbarg(imm)
   1876 	elseif c == "W" then
   1877 	  wputwarg(imm)
   1878 	elseif c == "i" or c == "I" then
   1879 	  if mode == "iJ" then
   1880 	    wputlabel("IMM_", imm, 1)
   1881 	  elseif mode == "iI" and c == "I" then
   1882 	    waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
   1883 	  else
   1884 	    wputszarg(sz, imm)
   1885 	  end
   1886 	elseif c == "J" then
   1887 	  if mode == "iPJ" then
   1888 	    waction("REL_A", imm) -- !x64 (secpos)
   1889 	  else
   1890 	    wputlabel("REL_", imm, 2)
   1891 	  end
   1892 	elseif c == "s" then
   1893 	  local reg = a.reg
   1894 	  if reg < 0 then
   1895 	    wputb(0)
   1896 	    wvreg("imm.hi", a.vreg)
   1897 	  else
   1898 	    wputb(shl(reg, 4))
   1899 	  end
   1900 	else
   1901 	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
   1902 	end
   1903       end
   1904     end
   1905   end
   1906 end
   1907 
   1908 ------------------------------------------------------------------------------
   1909 
   1910 -- Mapping of operand modes to short names. Suppress output with '#'.
   1911 local map_modename = {
   1912   r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
   1913   f = "stx", F = "st0", J = "lbl", ["1"] = "1",
   1914   I = "#", S = "#", O = "#",
   1915 }
   1916 
   1917 -- Return a table/string showing all possible operand modes.
   1918 local function templatehelp(template, nparams)
   1919   if nparams == 0 then return "" end
   1920   local t = {}
   1921   for tm in gmatch(template, "[^%|]+") do
   1922     local s = map_modename[sub(tm, 1, 1)]
   1923     s = s..gsub(sub(tm, 2, nparams), ".", function(c)
   1924       return ", "..map_modename[c]
   1925     end)
   1926     if not match(s, "#") then t[#t+1] = s end
   1927   end
   1928   return t
   1929 end
   1930 
   1931 -- Match operand modes against mode match part of template.
   1932 local function matchtm(tm, args)
   1933   for i=1,#args do
   1934     if not match(args[i].mode, sub(tm, i, i)) then return end
   1935   end
   1936   return true
   1937 end
   1938 
   1939 -- Handle opcodes defined with template strings.
   1940 map_op[".template__"] = function(params, template, nparams)
   1941   if not params then return templatehelp(template, nparams) end
   1942   local args = {}
   1943 
   1944   -- Zero-operand opcodes have no match part.
   1945   if #params == 0 then
   1946     dopattern(template, args, "d", params.op, nil)
   1947     return
   1948   end
   1949 
   1950   -- Determine common operand size (coerce undefined size) or flag as mixed.
   1951   local sz, szmix, needrex
   1952   for i,p in ipairs(params) do
   1953     args[i] = parseoperand(p)
   1954     local nsz = args[i].opsize
   1955     if nsz then
   1956       if sz and sz ~= nsz then szmix = true else sz = nsz end
   1957     end
   1958     local nrex = args[i].needrex
   1959     if nrex ~= nil then
   1960       if needrex == nil then
   1961 	needrex = nrex
   1962       elseif needrex ~= nrex then
   1963 	werror("bad mix of byte-addressable registers")
   1964       end
   1965     end
   1966   end
   1967 
   1968   -- Try all match:pattern pairs (separated by '|').
   1969   local gotmatch, lastpat
   1970   for tm in gmatch(template, "[^%|]+") do
   1971     -- Split off size match (starts after mode match) and pattern string.
   1972     local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
   1973     if pat == "" then pat = lastpat else lastpat = pat end
   1974     if matchtm(tm, args) then
   1975       local prefix = sub(szm, 1, 1)
   1976       if prefix == "/" then -- Exactly match leading operand sizes.
   1977 	for i = #szm,1,-1 do
   1978 	  if i == 1 then
   1979 	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
   1980 	    return
   1981 	  elseif args[i-1].opsize ~= sub(szm, i, i) then
   1982 	    break
   1983 	  end
   1984 	end
   1985       else -- Match common operand size.
   1986 	local szp = sz
   1987 	if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
   1988 	if prefix == "1" then szp = args[1].opsize; szmix = nil
   1989 	elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
   1990 	if not szmix and (prefix == "." or match(szm, szp or "#")) then
   1991 	  dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
   1992 	  return
   1993 	end
   1994       end
   1995       gotmatch = true
   1996     end
   1997   end
   1998 
   1999   local msg = "bad operand mode"
   2000   if gotmatch then
   2001     if szmix then
   2002       msg = "mixed operand size"
   2003     else
   2004       msg = sz and "bad operand size" or "missing operand size"
   2005     end
   2006   end
   2007 
   2008   werror(msg.." in `"..opmodestr(params.op, args).."'")
   2009 end
   2010 
   2011 ------------------------------------------------------------------------------
   2012 
   2013 -- x64-specific opcode for 64 bit immediates and displacements.
   2014 if x64 then
   2015   function map_op.mov64_2(params)
   2016     if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
   2017     if secpos+2 > maxsecpos then wflush() end
   2018     local opcode, op64, sz, rex, vreg
   2019     local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
   2020     if op64 then
   2021       local a = parseoperand(params[2])
   2022       if a.mode ~= "rmR" then werror("bad operand mode") end
   2023       sz = a.opsize
   2024       rex = sz == "q" and 8 or 0
   2025       opcode = 0xa3
   2026     else
   2027       op64 = match(params[2], "^%[%s*(.-)%s*%]$")
   2028       local a = parseoperand(params[1])
   2029       if op64 then
   2030 	if a.mode ~= "rmR" then werror("bad operand mode") end
   2031 	sz = a.opsize
   2032 	rex = sz == "q" and 8 or 0
   2033 	opcode = 0xa1
   2034       else
   2035 	if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
   2036 	  werror("bad operand mode")
   2037 	end
   2038 	op64 = params[2]
   2039 	if a.reg == -1 then
   2040 	  vreg = a.vreg
   2041 	  opcode = 0xb8
   2042 	else
   2043 	  opcode = 0xb8 + band(a.reg, 7)
   2044 	end
   2045 	rex = a.reg > 7 and 9 or 8
   2046       end
   2047     end
   2048     local psz, sk = wputop(sz, opcode, rex, nil, vreg)
   2049     wvreg("opcode", vreg, psz, sk)
   2050     waction("IMM_D", format("(unsigned int)(%s)", op64))
   2051     waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
   2052   end
   2053 end
   2054 
   2055 ------------------------------------------------------------------------------
   2056 
   2057 -- Pseudo-opcodes for data storage.
   2058 local function op_data(params)
   2059   if not params then return "imm..." end
   2060   local sz = sub(params.op, 2, 2)
   2061   if sz == "a" then sz = addrsize end
   2062   for _,p in ipairs(params) do
   2063     local a = parseoperand(p)
   2064     if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
   2065       werror("bad mode or size in `"..p.."'")
   2066     end
   2067     if a.mode == "iJ" then
   2068       wputlabel("IMM_", a.imm, 1)
   2069     else
   2070       wputszarg(sz, a.imm)
   2071     end
   2072     if secpos+2 > maxsecpos then wflush() end
   2073   end
   2074 end
   2075 
   2076 map_op[".byte_*"] = op_data
   2077 map_op[".sbyte_*"] = op_data
   2078 map_op[".word_*"] = op_data
   2079 map_op[".dword_*"] = op_data
   2080 map_op[".aword_*"] = op_data
   2081 
   2082 ------------------------------------------------------------------------------
   2083 
   2084 -- Pseudo-opcode to mark the position where the action list is to be emitted.
   2085 map_op[".actionlist_1"] = function(params)
   2086   if not params then return "cvar" end
   2087   local name = params[1] -- No syntax check. You get to keep the pieces.
   2088   wline(function(out) writeactions(out, name) end)
   2089 end
   2090 
   2091 -- Pseudo-opcode to mark the position where the global enum is to be emitted.
   2092 map_op[".globals_1"] = function(params)
   2093   if not params then return "prefix" end
   2094   local prefix = params[1] -- No syntax check. You get to keep the pieces.
   2095   wline(function(out) writeglobals(out, prefix) end)
   2096 end
   2097 
   2098 -- Pseudo-opcode to mark the position where the global names are to be emitted.
   2099 map_op[".globalnames_1"] = function(params)
   2100   if not params then return "cvar" end
   2101   local name = params[1] -- No syntax check. You get to keep the pieces.
   2102   wline(function(out) writeglobalnames(out, name) end)
   2103 end
   2104 
   2105 -- Pseudo-opcode to mark the position where the extern names are to be emitted.
   2106 map_op[".externnames_1"] = function(params)
   2107   if not params then return "cvar" end
   2108   local name = params[1] -- No syntax check. You get to keep the pieces.
   2109   wline(function(out) writeexternnames(out, name) end)
   2110 end
   2111 
   2112 ------------------------------------------------------------------------------
   2113 
   2114 -- Label pseudo-opcode (converted from trailing colon form).
   2115 map_op[".label_2"] = function(params)
   2116   if not params then return "[1-9] | ->global | =>pcexpr  [, addr]" end
   2117   if secpos+2 > maxsecpos then wflush() end
   2118   local a = parseoperand(params[1])
   2119   local mode, imm = a.mode, a.imm
   2120   if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
   2121     -- Local label (1: ... 9:) or global label (->global:).
   2122     waction("LABEL_LG", nil, 1)
   2123     wputxb(imm)
   2124   elseif mode == "iJ" then
   2125     -- PC label (=>pcexpr:).
   2126     waction("LABEL_PC", imm)
   2127   else
   2128     werror("bad label definition")
   2129   end
   2130   -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
   2131   local addr = params[2]
   2132   if addr then
   2133     local a = parseoperand(addr)
   2134     if a.mode == "iPJ" then
   2135       waction("SETLABEL", a.imm)
   2136     else
   2137       werror("bad label assignment")
   2138     end
   2139   end
   2140 end
   2141 map_op[".label_1"] = map_op[".label_2"]
   2142 
   2143 ------------------------------------------------------------------------------
   2144 
   2145 -- Alignment pseudo-opcode.
   2146 map_op[".align_1"] = function(params)
   2147   if not params then return "numpow2" end
   2148   if secpos+1 > maxsecpos then wflush() end
   2149   local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
   2150   if align then
   2151     local x = align
   2152     -- Must be a power of 2 in the range (2 ... 256).
   2153     for i=1,8 do
   2154       x = x / 2
   2155       if x == 1 then
   2156 	waction("ALIGN", nil, 1)
   2157 	wputxb(align-1) -- Action byte is 2**n-1.
   2158 	return
   2159       end
   2160     end
   2161   end
   2162   werror("bad alignment")
   2163 end
   2164 
   2165 -- Spacing pseudo-opcode.
   2166 map_op[".space_2"] = function(params)
   2167   if not params then return "num [, filler]" end
   2168   if secpos+1 > maxsecpos then wflush() end
   2169   waction("SPACE", params[1])
   2170   local fill = params[2]
   2171   if fill then
   2172     fill = tonumber(fill)
   2173     if not fill or fill < 0 or fill > 255 then werror("bad filler") end
   2174   end
   2175   wputxb(fill or 0)
   2176 end
   2177 map_op[".space_1"] = map_op[".space_2"]
   2178 
   2179 ------------------------------------------------------------------------------
   2180 
   2181 -- Pseudo-opcode for (primitive) type definitions (map to C types).
   2182 map_op[".type_3"] = function(params, nparams)
   2183   if not params then
   2184     return nparams == 2 and "name, ctype" or "name, ctype, reg"
   2185   end
   2186   local name, ctype, reg = params[1], params[2], params[3]
   2187   if not match(name, "^[%a_][%w_]*$") then
   2188     werror("bad type name `"..name.."'")
   2189   end
   2190   local tp = map_type[name]
   2191   if tp then
   2192     werror("duplicate type `"..name.."'")
   2193   end
   2194   if reg and not map_reg_valid_base[reg] then
   2195     werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
   2196   end
   2197   -- Add #type to defines. A bit unclean to put it in map_archdef.
   2198   map_archdef["#"..name] = "sizeof("..ctype..")"
   2199   -- Add new type and emit shortcut define.
   2200   local num = ctypenum + 1
   2201   map_type[name] = {
   2202     ctype = ctype,
   2203     ctypefmt = format("Dt%X(%%s)", num),
   2204     reg = reg,
   2205   }
   2206   wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
   2207   ctypenum = num
   2208 end
   2209 map_op[".type_2"] = map_op[".type_3"]
   2210 
   2211 -- Dump type definitions.
   2212 local function dumptypes(out, lvl)
   2213   local t = {}
   2214   for name in pairs(map_type) do t[#t+1] = name end
   2215   sort(t)
   2216   out:write("Type definitions:\n")
   2217   for _,name in ipairs(t) do
   2218     local tp = map_type[name]
   2219     local reg = tp.reg and map_reg_rev[tp.reg] or ""
   2220     out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
   2221   end
   2222   out:write("\n")
   2223 end
   2224 
   2225 ------------------------------------------------------------------------------
   2226 
   2227 -- Set the current section.
   2228 function _M.section(num)
   2229   waction("SECTION")
   2230   wputxb(num)
   2231   wflush(true) -- SECTION is a terminal action.
   2232 end
   2233 
   2234 ------------------------------------------------------------------------------
   2235 
   2236 -- Dump architecture description.
   2237 function _M.dumparch(out)
   2238   out:write(format("DynASM %s version %s, released %s\n\n",
   2239     _info.arch, _info.version, _info.release))
   2240   dumpregs(out)
   2241   dumpactions(out)
   2242 end
   2243 
   2244 -- Dump all user defined elements.
   2245 function _M.dumpdef(out, lvl)
   2246   dumptypes(out, lvl)
   2247   dumpglobals(out, lvl)
   2248   dumpexterns(out, lvl)
   2249 end
   2250 
   2251 ------------------------------------------------------------------------------
   2252 
   2253 -- Pass callbacks from/to the DynASM core.
   2254 function _M.passcb(wl, we, wf, ww)
   2255   wline, werror, wfatal, wwarn = wl, we, wf, ww
   2256   return wflush
   2257 end
   2258 
   2259 -- Setup the arch-specific module.
   2260 function _M.setup(arch, opt)
   2261   g_arch, g_opt = arch, opt
   2262 end
   2263 
   2264 -- Merge the core maps and the arch-specific maps.
   2265 function _M.mergemaps(map_coreop, map_def)
   2266   setmetatable(map_op, { __index = map_coreop })
   2267   setmetatable(map_def, { __index = map_archdef })
   2268   return map_op, map_def
   2269 end
   2270 
   2271 return _M
   2272 
   2273 ------------------------------------------------------------------------------
   2274