mirror of
https://github.com/rehlds/metamod-r.git
synced 2025-01-28 06:27:55 +03:00
8969 lines
596 KiB
C++
8969 lines
596 KiB
C++
// Copyright (c) 2009-2011, Hikaru Inoue, Akihiro Yamasaki,
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials provided
|
|
// with the distribution.
|
|
// * The names of the contributors may not be used to endorse or promote
|
|
// products derived from this software without specific prior written
|
|
// permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#pragma once
|
|
#ifndef JITASM_H
|
|
#define JITASM_H
|
|
|
|
#if defined(_WIN32)
|
|
#define JITASM_WIN // Windows
|
|
#endif
|
|
|
|
#if (defined(_WIN64) && (defined(_M_AMD64) || defined(_M_X64))) || defined(__x86_64__)
|
|
#define JITASM64
|
|
#endif
|
|
|
|
#if defined(__GNUC__)
|
|
#define JITASM_GCC
|
|
#endif
|
|
|
|
#if !defined(JITASM_MMINTRIN)
|
|
#if !defined(__GNUC__) || defined(__MMX__)
|
|
#define JITASM_MMINTRIN 1
|
|
#else
|
|
#define JITASM_MMINTRIN 0
|
|
#endif
|
|
#endif
|
|
#if !defined(JITASM_XMMINTRIN)
|
|
#if !defined(__GNUC__) || defined(__SSE__)
|
|
#define JITASM_XMMINTRIN 1
|
|
#else
|
|
#define JITASM_XMMINTRIN 0
|
|
#endif
|
|
#endif
|
|
#if !defined(JITASM_EMMINTRIN)
|
|
#if !defined(__GNUC__) || defined(__SSE2__)
|
|
#define JITASM_EMMINTRIN 1
|
|
#else
|
|
#define JITASM_EMMINTRIN 0
|
|
#endif
|
|
#endif
|
|
|
|
|
|
#include <string>
|
|
#include <deque>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <string.h>
|
|
|
|
#if defined(JITASM_WIN)
|
|
#include <windows.h>
|
|
#else
|
|
#include <unistd.h>
|
|
#include <sys/types.h>
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#if JITASM_MMINTRIN
|
|
#include <mmintrin.h>
|
|
#endif
|
|
#if JITASM_XMMINTRIN
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
#if JITASM_EMMINTRIN
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
#if _MSC_VER >= 1400 // VC8 or later
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
#if defined(_MSC_VER)
|
|
#pragma warning( push )
|
|
#pragma warning( disable : 4127 ) // conditional expression is constant.
|
|
#pragma warning( disable : 4201 ) // nonstandard extension used : nameless struct/union
|
|
#endif
|
|
|
|
#ifdef ASSERT
|
|
#define JITASM_ASSERT ASSERT
|
|
#else
|
|
#include <assert.h>
|
|
#define JITASM_ASSERT assert
|
|
#endif
|
|
|
|
//#define JITASM_DEBUG_DUMP
|
|
#ifdef JITASM_DEBUG_DUMP
|
|
#if defined(JITASM_GCC)
|
|
#include <stdio.h>
|
|
#define JITASM_TRACE printf
|
|
#else
|
|
#define JITASM_TRACE jitasm::detail::Trace
|
|
#endif
|
|
#elif defined(JITASM_GCC)
|
|
#define JITASM_TRACE(...) ((void)0)
|
|
#else
|
|
#define JITASM_TRACE __noop
|
|
#endif
|
|
|
|
namespace jitasm
|
|
{
|
|
|
|
typedef signed char sint8;
|
|
typedef signed short sint16;
|
|
typedef signed int sint32;
|
|
typedef unsigned char uint8;
|
|
typedef unsigned short uint16;
|
|
typedef unsigned int uint32;
|
|
#if defined(JITASM_GCC)
|
|
typedef signed long long sint64;
|
|
typedef unsigned long long uint64;
|
|
#else
|
|
typedef signed __int64 sint64;
|
|
typedef unsigned __int64 uint64;
|
|
#endif
|
|
|
|
template<typename T> inline void avoid_unused_warn(const T&) {}
|
|
|
|
namespace detail
|
|
{
|
|
#if defined(JITASM_GCC)
|
|
inline long interlocked_increment(long *addend) { return __sync_add_and_fetch(addend, 1); }
|
|
inline long interlocked_decrement(long *addend) { return __sync_sub_and_fetch(addend, 1); }
|
|
inline long interlocked_exchange(long *target, long value) { return __sync_lock_test_and_set(target, value); }
|
|
#elif defined(JITASM_WIN)
|
|
inline long interlocked_increment(long *addend) { return _InterlockedIncrement(addend); }
|
|
inline long interlocked_decrement(long *addend) { return _InterlockedDecrement(addend); }
|
|
inline long interlocked_exchange(long *target, long value) { return _InterlockedExchange(target, value); }
|
|
#endif
|
|
} // namespace detail
|
|
|
|
/// Physical register ID
|
|
enum PhysicalRegID
|
|
{
|
|
INVALID=-1,
|
|
EAX=0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
|
|
AL=0, CL, DL, BL, AH, CH, DH, BH, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
|
|
AX=0, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
|
|
RAX=0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
|
|
ST0=0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
|
|
MM0=0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
|
XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
|
YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
|
|
};
|
|
|
|
enum
|
|
{
|
|
/** \var NUM_OF_PHYSICAL_REG
|
|
* Number of physical register
|
|
*/
|
|
/** \var SIZE_OF_GP_REG
|
|
* Size of general-purpose register
|
|
*/
|
|
#ifdef JITASM64
|
|
NUM_OF_PHYSICAL_REG = 16,
|
|
SIZE_OF_GP_REG = 8
|
|
#else
|
|
NUM_OF_PHYSICAL_REG = 8,
|
|
SIZE_OF_GP_REG = 4
|
|
#endif
|
|
};
|
|
|
|
/// Register type
|
|
enum RegType
|
|
{
|
|
R_TYPE_GP, ///< General purpose register
|
|
R_TYPE_MMX, ///< MMX register
|
|
R_TYPE_XMM, ///< XMM register
|
|
R_TYPE_YMM, ///< YMM register
|
|
R_TYPE_FPU, ///< FPU register
|
|
R_TYPE_SYMBOLIC_GP, ///< Symbolic general purpose register
|
|
R_TYPE_SYMBOLIC_MMX, ///< Symbolic MMX register
|
|
R_TYPE_SYMBOLIC_XMM, ///< Symbolic XMM register
|
|
R_TYPE_SYMBOLIC_YMM ///< Symbolic YMM register
|
|
};
|
|
|
|
/// Register identifier
|
|
struct RegID
|
|
{
|
|
RegType type;
|
|
int id; ///< PhysicalRegID or symbolic register id
|
|
|
|
bool operator==(const RegID& rhs) const {return type == rhs.type && id == rhs.id;}
|
|
bool operator!=(const RegID& rhs) const {return !(*this == rhs);}
|
|
bool operator<(const RegID& rhs) const {return type != rhs.type ? type < rhs.type : id < rhs.id;}
|
|
bool IsInvalid() const {return type == R_TYPE_GP && id == INVALID;}
|
|
bool IsSymbolic() const {return type == R_TYPE_SYMBOLIC_GP || type == R_TYPE_SYMBOLIC_MMX || type == R_TYPE_SYMBOLIC_XMM;}
|
|
|
|
static RegID Invalid() {
|
|
RegID reg;
|
|
reg.type = R_TYPE_GP;
|
|
reg.id = INVALID;
|
|
return reg;
|
|
}
|
|
static RegID CreatePhysicalRegID(RegType type_, PhysicalRegID id_) {
|
|
RegID reg;
|
|
reg.type = type_;
|
|
reg.id = id_;
|
|
return reg;
|
|
}
|
|
static RegID CreateSymbolicRegID(RegType type_) {
|
|
static long s_id = 0;
|
|
RegID reg;
|
|
reg.type = type_;
|
|
reg.id = static_cast<int>(detail::interlocked_increment(&s_id));
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
/// Operand type
|
|
enum OpdType
|
|
{
|
|
O_TYPE_NONE,
|
|
O_TYPE_REG,
|
|
O_TYPE_MEM,
|
|
O_TYPE_IMM,
|
|
O_TYPE_TYPE_MASK = 0x0F,
|
|
|
|
O_TYPE_DUMMY = 1 << 8, ///< The operand which has this flag is not encoded. This is for register allocator.
|
|
O_TYPE_READ = 1 << 9, ///< The operand is used for reading.
|
|
O_TYPE_WRITE = 1 << 10 ///< The operand is used for writing.
|
|
};
|
|
|
|
/// Operand size
|
|
enum OpdSize
|
|
{
|
|
O_SIZE_8 = 8,
|
|
O_SIZE_16 = 16,
|
|
O_SIZE_32 = 32,
|
|
O_SIZE_64 = 64,
|
|
O_SIZE_80 = 80,
|
|
O_SIZE_128 = 128,
|
|
O_SIZE_224 = 224,
|
|
O_SIZE_256 = 256,
|
|
O_SIZE_864 = 864,
|
|
O_SIZE_4096 = 4096,
|
|
};
|
|
|
|
namespace detail
|
|
{
|
|
/// Operand base class
|
|
struct Opd
|
|
{
|
|
OpdType opdtype_;
|
|
OpdSize opdsize_;
|
|
|
|
union {
|
|
// REG
|
|
struct {
|
|
RegID reg_;
|
|
uint32 reg_assignable_;
|
|
};
|
|
// MEM
|
|
struct {
|
|
RegID base_;
|
|
RegID index_;
|
|
sint64 scale_;
|
|
sint64 disp_;
|
|
OpdSize addrsize_;
|
|
};
|
|
// IMM
|
|
sint64 imm_;
|
|
};
|
|
|
|
/// NONE
|
|
Opd() : opdtype_(O_TYPE_NONE) {}
|
|
/// REG
|
|
Opd(OpdSize opdsize, const RegID& reg, uint32 reg_assignable = 0xFFFFFFFF) : opdtype_(O_TYPE_REG), opdsize_(opdsize), reg_(reg), reg_assignable_(reg_assignable) {}
|
|
/// MEM
|
|
Opd(OpdSize opdsize, OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp)
|
|
: opdtype_(O_TYPE_MEM), opdsize_(opdsize), base_(base), index_(index), scale_(scale), disp_(disp), addrsize_(addrsize) {}
|
|
protected:
|
|
/// IMM
|
|
explicit Opd(OpdSize opdsize, sint64 imm) : opdtype_(O_TYPE_IMM), opdsize_(opdsize), imm_(imm) {}
|
|
|
|
public:
|
|
bool IsNone() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_NONE;}
|
|
bool IsReg() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_REG;}
|
|
bool IsGpReg() const {return IsReg() && (reg_.type == R_TYPE_GP || reg_.type == R_TYPE_SYMBOLIC_GP);}
|
|
bool IsFpuReg() const {return IsReg() && reg_.type == R_TYPE_FPU;}
|
|
bool IsMmxReg() const {return IsReg() && (reg_.type == R_TYPE_MMX || reg_.type == R_TYPE_SYMBOLIC_MMX);}
|
|
bool IsXmmReg() const {return IsReg() && (reg_.type == R_TYPE_XMM || reg_.type == R_TYPE_SYMBOLIC_XMM);}
|
|
bool IsYmmReg() const {return IsReg() && (reg_.type == R_TYPE_YMM || reg_.type == R_TYPE_SYMBOLIC_YMM);}
|
|
bool IsMem() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_MEM;}
|
|
bool IsImm() const {return (opdtype_ & O_TYPE_TYPE_MASK) == O_TYPE_IMM;}
|
|
bool IsDummy() const {return (opdtype_ & O_TYPE_DUMMY) != 0;}
|
|
|
|
OpdSize GetSize() const {return opdsize_;}
|
|
OpdSize GetAddressSize() const {return addrsize_;}
|
|
RegID GetReg() const {JITASM_ASSERT(IsReg()); return reg_;}
|
|
RegID GetBase() const {JITASM_ASSERT(IsMem()); return base_;}
|
|
RegID GetIndex() const {JITASM_ASSERT(IsMem()); return index_;}
|
|
sint64 GetScale() const {JITASM_ASSERT(IsMem()); return scale_;}
|
|
sint64 GetDisp() const {JITASM_ASSERT(IsMem()); return disp_;}
|
|
sint64 GetImm() const {JITASM_ASSERT(IsImm()); return imm_;}
|
|
|
|
bool operator==(const Opd& rhs) const
|
|
{
|
|
if ((opdtype_ & O_TYPE_TYPE_MASK) != (rhs.opdtype_ & O_TYPE_TYPE_MASK) || rhs.opdsize_ != opdsize_) {return false;}
|
|
if (IsReg()) {return reg_ == rhs.reg_ && reg_assignable_ == rhs.reg_assignable_;}
|
|
if (IsMem()) {return base_ == rhs.base_ && index_ == rhs.index_ && scale_ == rhs.scale_ && disp_ == rhs.disp_ && addrsize_ == rhs.addrsize_;}
|
|
if (IsImm()) {return imm_ == rhs.imm_;}
|
|
return true;
|
|
}
|
|
bool operator!=(const Opd& rhs) const {return !(*this == rhs);}
|
|
};
|
|
|
|
/// Add O_TYPE_DUMMY to the specified operand
|
|
inline Opd Dummy(const Opd& opd)
|
|
{
|
|
Opd o(opd);
|
|
o.opdtype_ = static_cast<OpdType>(static_cast<int>(o.opdtype_) | O_TYPE_DUMMY);
|
|
return o;
|
|
}
|
|
|
|
/// Add O_TYPE_DUMMY to the specified operand and constraint of register assignment
|
|
inline Opd Dummy(const Opd& opd, const Opd& constraint)
|
|
{
|
|
JITASM_ASSERT(opd.IsReg() && (opd.opdtype_ & O_TYPE_TYPE_MASK) == (constraint.opdtype_ & O_TYPE_TYPE_MASK) && !constraint.GetReg().IsSymbolic());
|
|
Opd o(opd);
|
|
o.opdtype_ = static_cast<OpdType>(static_cast<int>(o.opdtype_) | O_TYPE_DUMMY);
|
|
o.reg_assignable_ = (1 << constraint.reg_.id);
|
|
return o;
|
|
}
|
|
|
|
/// Add O_TYPE_READ to the specified operand
|
|
inline Opd R(const Opd& opd)
|
|
{
|
|
Opd o(opd);
|
|
o.opdtype_ = static_cast<OpdType>(static_cast<int>(o.opdtype_) | O_TYPE_READ);
|
|
return o;
|
|
}
|
|
|
|
/// Add O_TYPE_WRITE to the specified operand
|
|
inline Opd W(const Opd& opd)
|
|
{
|
|
Opd o(opd);
|
|
o.opdtype_ = static_cast<OpdType>(static_cast<int>(o.opdtype_) | O_TYPE_WRITE);
|
|
return o;
|
|
}
|
|
|
|
/// Add O_TYPE_READ | O_TYPE_WRITE to the specified operand
|
|
inline Opd RW(const Opd& opd)
|
|
{
|
|
Opd o(opd);
|
|
o.opdtype_ = static_cast<OpdType>(static_cast<int>(o.opdtype_) | O_TYPE_READ | O_TYPE_WRITE);
|
|
return o;
|
|
}
|
|
|
|
template<int Size>
|
|
struct OpdT : Opd
|
|
{
|
|
/// NONE
|
|
OpdT() : Opd() {}
|
|
/// REG
|
|
explicit OpdT(const RegID& reg, uint32 reg_assignable = 0xFFFFFFFF) : Opd(static_cast<OpdSize>(Size), reg, reg_assignable) {}
|
|
/// MEM
|
|
OpdT(OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp)
|
|
: Opd(static_cast<OpdSize>(Size), addrsize, base, index, scale, disp) {}
|
|
protected:
|
|
/// IMM
|
|
OpdT(sint64 imm) : Opd(static_cast<OpdSize>(Size), imm) {}
|
|
};
|
|
|
|
} // namespace detail
|
|
|
|
typedef detail::OpdT<O_SIZE_8> Opd8;
|
|
typedef detail::OpdT<O_SIZE_16> Opd16;
|
|
typedef detail::OpdT<O_SIZE_32> Opd32;
|
|
typedef detail::OpdT<O_SIZE_64> Opd64;
|
|
typedef detail::OpdT<O_SIZE_80> Opd80;
|
|
typedef detail::OpdT<O_SIZE_128> Opd128;
|
|
typedef detail::OpdT<O_SIZE_224> Opd224; // FPU environment
|
|
typedef detail::OpdT<O_SIZE_256> Opd256;
|
|
typedef detail::OpdT<O_SIZE_864> Opd864; // FPU state
|
|
typedef detail::OpdT<O_SIZE_4096> Opd4096; // FPU, MMX, XMM, MXCSR state
|
|
|
|
/// 8bit general purpose register
|
|
struct Reg8 : Opd8 {
|
|
Reg8() : Opd8(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP), 0xFFFFFF0F) {}
|
|
explicit Reg8(PhysicalRegID id) : Opd8(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {}
|
|
};
|
|
/// 16bit general purpose register
|
|
struct Reg16 : Opd16 {
|
|
Reg16() : Opd16(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {}
|
|
explicit Reg16(PhysicalRegID id) : Opd16(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {}
|
|
};
|
|
/// 32bit general purpose register
|
|
struct Reg32 : Opd32 {
|
|
Reg32() : Opd32(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {}
|
|
explicit Reg32(PhysicalRegID id) : Opd32(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {}
|
|
};
|
|
#ifdef JITASM64
|
|
/// 64bit general purpose register
|
|
struct Reg64 : Opd64 {
|
|
Reg64() : Opd64(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_GP)) {}
|
|
explicit Reg64(PhysicalRegID id) : Opd64(RegID::CreatePhysicalRegID(R_TYPE_GP, id)) {}
|
|
};
|
|
typedef Reg64 Reg;
|
|
#else
|
|
typedef Reg32 Reg;
|
|
#endif
|
|
/// FPU register
|
|
struct FpuReg : Opd80 {
|
|
explicit FpuReg(PhysicalRegID id) : Opd80(RegID::CreatePhysicalRegID(R_TYPE_FPU, id)) {}
|
|
};
|
|
/// MMX register
|
|
struct MmxReg : Opd64 {
|
|
MmxReg() : Opd64(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_MMX)) {}
|
|
explicit MmxReg(PhysicalRegID id) : Opd64(RegID::CreatePhysicalRegID(R_TYPE_MMX, id)) {}
|
|
};
|
|
/// XMM register
|
|
struct XmmReg : Opd128 {
|
|
XmmReg() : Opd128(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_XMM)) {}
|
|
explicit XmmReg(PhysicalRegID id) : Opd128(RegID::CreatePhysicalRegID(R_TYPE_XMM, id)) {}
|
|
};
|
|
/// YMM register
|
|
struct YmmReg : Opd256 {
|
|
YmmReg() : Opd256(RegID::CreateSymbolicRegID(R_TYPE_SYMBOLIC_YMM)) {}
|
|
explicit YmmReg(PhysicalRegID id) : Opd256(RegID::CreatePhysicalRegID(R_TYPE_YMM, id)) {}
|
|
};
|
|
|
|
struct Reg8_al : Reg8 {Reg8_al() : Reg8(AL) {}};
|
|
struct Reg8_cl : Reg8 {Reg8_cl() : Reg8(CL) {}};
|
|
struct Reg16_ax : Reg16 {Reg16_ax() : Reg16(AX) {}};
|
|
struct Reg16_dx : Reg16 {Reg16_dx() : Reg16(DX) {}};
|
|
struct Reg32_eax : Reg32 {Reg32_eax() : Reg32(EAX) {}};
|
|
#ifdef JITASM64
|
|
struct Reg64_rax : Reg64 {Reg64_rax() : Reg64(RAX) {}};
|
|
#endif
|
|
struct FpuReg_st0 : FpuReg {FpuReg_st0() : FpuReg(ST0) {}};
|
|
|
|
template<class OpdN>
|
|
struct MemT : OpdN
|
|
{
|
|
MemT(OpdSize addrsize, const RegID& base, const RegID& index, sint64 scale, sint64 disp) : OpdN(addrsize, base, index, scale, disp) {}
|
|
};
|
|
typedef MemT<Opd8> Mem8;
|
|
typedef MemT<Opd16> Mem16;
|
|
typedef MemT<Opd32> Mem32;
|
|
typedef MemT<Opd64> Mem64;
|
|
typedef MemT<Opd80> Mem80;
|
|
typedef MemT<Opd128> Mem128;
|
|
typedef MemT<Opd224> Mem224; // FPU environment
|
|
typedef MemT<Opd256> Mem256;
|
|
typedef MemT<Opd864> Mem864; // FPU state
|
|
typedef MemT<Opd4096> Mem4096; // FPU, MMX, XMM, MXCSR state
|
|
|
|
struct MemOffset64
|
|
{
|
|
sint64 offset_;
|
|
explicit MemOffset64(sint64 offset) : offset_(offset) {}
|
|
sint64 GetOffset() const {return offset_;}
|
|
};
|
|
|
|
template<class OpdN, class U, class S>
|
|
struct ImmT : OpdN
|
|
{
|
|
ImmT(U imm) : OpdN((S) imm) {}
|
|
};
|
|
typedef ImmT<Opd8, uint8, sint8> Imm8; ///< 1 byte immediate
|
|
typedef ImmT<Opd16, uint16, sint16> Imm16; ///< 2 byte immediate
|
|
typedef ImmT<Opd32, uint32, sint32> Imm32; ///< 4 byte immediate
|
|
typedef ImmT<Opd64, uint64, sint64> Imm64; ///< 8 byte immediate
|
|
|
|
namespace detail
|
|
{
|
|
inline bool IsInt8(sint64 n) {return (sint8) n == n;}
|
|
inline bool IsInt16(sint64 n) {return (sint16) n == n;}
|
|
inline bool IsInt32(sint64 n) {return (sint32) n == n;}
|
|
inline Opd ImmXor8(const Imm16& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;}
|
|
inline Opd ImmXor8(const Imm32& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;}
|
|
inline Opd ImmXor8(const Imm64& imm) {return IsInt8(imm.GetImm()) ? (Opd) Imm8((sint8) imm.GetImm()) : (Opd) imm;}
|
|
} // namespace detail
|
|
|
|
|
|
/// 32bit address (base, displacement)
|
|
struct Addr32
|
|
{
|
|
RegID reg_;
|
|
sint64 disp_;
|
|
Addr32(const Reg32& obj) : reg_(obj.reg_), disp_(0) {} // implicit
|
|
Addr32(const RegID& reg, sint64 disp) : reg_(reg), disp_(disp) {}
|
|
};
|
|
inline Addr32 operator+(const Reg32& lhs, sint64 rhs) {return Addr32(lhs.reg_, rhs);}
|
|
inline Addr32 operator+(sint64 lhs, const Reg32& rhs) {return rhs + lhs;}
|
|
inline Addr32 operator-(const Reg32& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
inline Addr32 operator+(const Addr32& lhs, sint64 rhs) {return Addr32(lhs.reg_, lhs.disp_ + rhs);}
|
|
inline Addr32 operator+(sint64 lhs, const Addr32& rhs) {return rhs + lhs;}
|
|
inline Addr32 operator-(const Addr32& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 32bit address (base, index, displacement)
|
|
struct Addr32BI
|
|
{
|
|
RegID base_;
|
|
RegID index_;
|
|
sint64 disp_;
|
|
Addr32BI(const RegID& base, const RegID& index, sint64 disp) : base_(base), index_(index), disp_(disp) {}
|
|
};
|
|
inline Addr32BI operator+(const Addr32& lhs, const Addr32& rhs) {return Addr32BI(rhs.reg_, lhs.reg_, lhs.disp_ + rhs.disp_);}
|
|
inline Addr32BI operator+(const Addr32BI& lhs, sint64 rhs) {return Addr32BI(lhs.base_, lhs.index_, lhs.disp_ + rhs);}
|
|
inline Addr32BI operator+(sint64 lhs, const Addr32BI& rhs) {return rhs + lhs;}
|
|
inline Addr32BI operator-(const Addr32BI& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 32bit address (index, scale, displacement)
|
|
struct Addr32SI
|
|
{
|
|
RegID index_;
|
|
sint64 scale_;
|
|
sint64 disp_;
|
|
Addr32SI(const RegID& index, sint64 scale, sint64 disp) : index_(index), scale_(scale), disp_(disp) {}
|
|
};
|
|
inline Addr32SI operator*(const Reg32& lhs, sint64 rhs) {return Addr32SI(lhs.reg_, rhs, 0);}
|
|
inline Addr32SI operator*(sint64 lhs, const Reg32& rhs) {return rhs * lhs;}
|
|
inline Addr32SI operator*(const Addr32SI& lhs, sint64 rhs) {return Addr32SI(lhs.index_, lhs.scale_ * rhs, lhs.disp_);}
|
|
inline Addr32SI operator*(sint64 lhs, const Addr32SI& rhs) {return rhs * lhs;}
|
|
inline Addr32SI operator+(const Addr32SI& lhs, sint64 rhs) {return Addr32SI(lhs.index_, lhs.scale_, lhs.disp_ + rhs);}
|
|
inline Addr32SI operator+(sint64 lhs, const Addr32SI& rhs) {return rhs + lhs;}
|
|
inline Addr32SI operator-(const Addr32SI& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 32bit address (base, index, scale, displacement)
|
|
struct Addr32SIB
|
|
{
|
|
RegID base_;
|
|
RegID index_;
|
|
sint64 scale_;
|
|
sint64 disp_;
|
|
Addr32SIB(const RegID& base, const RegID& index, sint64 scale, sint64 disp) : base_(base), index_(index), scale_(scale), disp_(disp) {}
|
|
};
|
|
inline Addr32SIB operator+(const Addr32& lhs, const Addr32SI& rhs) {return Addr32SIB(lhs.reg_, rhs.index_, rhs.scale_, lhs.disp_ + rhs.disp_);}
|
|
inline Addr32SIB operator+(const Addr32SI& lhs, const Addr32& rhs) {return rhs + lhs;}
|
|
inline Addr32SIB operator+(const Addr32SIB& lhs, sint64 rhs) {return Addr32SIB(lhs.base_, lhs.index_, lhs.scale_, lhs.disp_ + rhs);}
|
|
inline Addr32SIB operator+(sint64 lhs, const Addr32SIB& rhs) {return rhs + lhs;}
|
|
inline Addr32SIB operator-(const Addr32SIB& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
#ifdef JITASM64
|
|
/// 64bit address (base, displacement)
|
|
struct Addr64
|
|
{
|
|
RegID reg_;
|
|
sint64 disp_;
|
|
Addr64(const Reg64& obj) : reg_(obj.reg_), disp_(0) {} // implicit
|
|
Addr64(const RegID& reg, sint64 disp) : reg_(reg), disp_(disp) {}
|
|
};
|
|
inline Addr64 operator+(const Reg64& lhs, sint64 rhs) {return Addr64(lhs.reg_, rhs);}
|
|
inline Addr64 operator+(sint64 lhs, const Reg64& rhs) {return rhs + lhs;}
|
|
inline Addr64 operator-(const Reg64& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
inline Addr64 operator+(const Addr64& lhs, sint64 rhs) {return Addr64(lhs.reg_, lhs.disp_ + rhs);}
|
|
inline Addr64 operator+(sint64 lhs, const Addr64& rhs) {return rhs + lhs;}
|
|
inline Addr64 operator-(const Addr64& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 64bit address (base, index, displacement)
|
|
struct Addr64BI
|
|
{
|
|
RegID base_;
|
|
RegID index_;
|
|
sint64 disp_;
|
|
Addr64BI(const RegID& base, const RegID& index, sint64 disp) : base_(base), index_(index), disp_(disp) {}
|
|
};
|
|
inline Addr64BI operator+(const Addr64& lhs, const Addr64& rhs) {return Addr64BI(rhs.reg_, lhs.reg_, lhs.disp_ + rhs.disp_);}
|
|
inline Addr64BI operator+(const Addr64BI& lhs, sint64 rhs) {return Addr64BI(lhs.base_, lhs.index_, lhs.disp_ + rhs);}
|
|
inline Addr64BI operator+(sint64 lhs, const Addr64BI& rhs) {return rhs + lhs;}
|
|
inline Addr64BI operator-(const Addr64BI& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 64bit address (index, scale, displacement)
|
|
struct Addr64SI
|
|
{
|
|
RegID index_;
|
|
sint64 scale_;
|
|
sint64 disp_;
|
|
Addr64SI(const RegID& index, sint64 scale, sint64 disp) : index_(index), scale_(scale), disp_(disp) {}
|
|
};
|
|
inline Addr64SI operator*(const Reg64& lhs, sint64 rhs) {return Addr64SI(lhs.reg_, rhs, 0);}
|
|
inline Addr64SI operator*(sint64 lhs, const Reg64& rhs) {return rhs * lhs;}
|
|
inline Addr64SI operator*(const Addr64SI& lhs, sint64 rhs) {return Addr64SI(lhs.index_, lhs.scale_ * rhs, lhs.disp_);}
|
|
inline Addr64SI operator*(sint64 lhs, const Addr64SI& rhs) {return rhs * lhs;}
|
|
inline Addr64SI operator+(const Addr64SI& lhs, sint64 rhs) {return Addr64SI(lhs.index_, lhs.scale_, lhs.disp_ + rhs);}
|
|
inline Addr64SI operator+(sint64 lhs, const Addr64SI& rhs) {return rhs + lhs;}
|
|
inline Addr64SI operator-(const Addr64SI& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
/// 64bit address (base, index, scale, displacement)
|
|
struct Addr64SIB
|
|
{
|
|
RegID base_;
|
|
RegID index_;
|
|
sint64 scale_;
|
|
sint64 disp_;
|
|
Addr64SIB(const RegID& base, const RegID& index, sint64 scale, sint64 disp) : base_(base), index_(index), scale_(scale), disp_(disp) {}
|
|
};
|
|
inline Addr64SIB operator+(const Addr64& lhs, const Addr64SI& rhs) {return Addr64SIB(lhs.reg_, rhs.index_, rhs.scale_, lhs.disp_ + rhs.disp_);}
|
|
inline Addr64SIB operator+(const Addr64SI& lhs, const Addr64& rhs) {return rhs + lhs;}
|
|
inline Addr64SIB operator+(const Addr64SIB& lhs, sint64 rhs) {return Addr64SIB(lhs.base_, lhs.index_, lhs.scale_, lhs.disp_ + rhs);}
|
|
inline Addr64SIB operator+(sint64 lhs, const Addr64SIB& rhs) {return rhs + lhs;}
|
|
inline Addr64SIB operator-(const Addr64SIB& lhs, sint64 rhs) {return lhs + -rhs;}
|
|
|
|
typedef Addr64 Addr;
|
|
typedef Addr64BI AddrBI;
|
|
typedef Addr64SI AddrSI;
|
|
typedef Addr64SIB AddrSIB;
|
|
#else
|
|
typedef Addr32 Addr;
|
|
typedef Addr32BI AddrBI;
|
|
typedef Addr32SI AddrSI;
|
|
typedef Addr32SIB AddrSIB;
|
|
#endif
|
|
|
|
template<typename OpdN>
|
|
struct AddressingPtr
|
|
{
|
|
// 32bit-Addressing
|
|
MemT<OpdN> operator[](const Addr32& obj) {return MemT<OpdN>(O_SIZE_32, obj.reg_, RegID::Invalid(), 0, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr32BI& obj) {return MemT<OpdN>(O_SIZE_32, obj.base_, obj.index_, 0, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr32SI& obj) {return MemT<OpdN>(O_SIZE_32, RegID::Invalid(), obj.index_, obj.scale_, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr32SIB& obj) {return MemT<OpdN>(O_SIZE_32, obj.base_, obj.index_, obj.scale_, obj.disp_);}
|
|
#ifdef JITASM64
|
|
MemT<OpdN> operator[](sint32 disp) {return MemT<OpdN>(O_SIZE_64, RegID::Invalid(), RegID::Invalid(), 0, disp);}
|
|
MemT<OpdN> operator[](uint32 disp) {return MemT<OpdN>(O_SIZE_64, RegID::Invalid(), RegID::Invalid(), 0, (sint32) disp);}
|
|
#else
|
|
MemT<OpdN> operator[](sint32 disp) {return MemT<OpdN>(O_SIZE_32, RegID::Invalid(), RegID::Invalid(), 0, disp);}
|
|
MemT<OpdN> operator[](uint32 disp) {return MemT<OpdN>(O_SIZE_32, RegID::Invalid(), RegID::Invalid(), 0, (sint32) disp);}
|
|
#endif
|
|
|
|
#ifdef JITASM64
|
|
// 64bit-Addressing
|
|
MemT<OpdN> operator[](const Addr64& obj) {return MemT<OpdN>(O_SIZE_64, obj.reg_, RegID::Invalid(), 0, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr64BI& obj) {return MemT<OpdN>(O_SIZE_64, obj.base_, obj.index_, 0, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr64SI& obj) {return MemT<OpdN>(O_SIZE_64, RegID::Invalid(), obj.index_, obj.scale_, obj.disp_);}
|
|
MemT<OpdN> operator[](const Addr64SIB& obj) {return MemT<OpdN>(O_SIZE_64, obj.base_, obj.index_, obj.scale_, obj.disp_);}
|
|
MemOffset64 operator[](sint64 offset) {return MemOffset64(offset);}
|
|
MemOffset64 operator[](uint64 offset) {return MemOffset64((sint64) offset);}
|
|
#endif
|
|
};
|
|
|
|
/// Instruction ID
|
|
enum InstrID
|
|
{
|
|
I_ADC, I_ADD, I_AND,
|
|
I_BSF, I_BSR, I_BSWAP, I_BT, I_BTC, I_BTR, I_BTS,
|
|
I_CALL, I_CBW, I_CLC, I_CLD, I_CLI, I_CLTS, I_CMC, I_CMOVCC, I_CMP, I_CMPS_B, I_CMPS_W, I_CMPS_D, I_CMPS_Q, I_CMPXCHG,
|
|
I_CMPXCHG8B, I_CMPXCHG16B, I_CPUID, I_CWD, I_CDQ, I_CQO,
|
|
I_DEC, I_DIV,
|
|
I_ENTER,
|
|
I_HLT,
|
|
I_IDIV, I_IMUL, I_IN, I_INC, I_INS_B, I_INS_W, I_INS_D, I_INVD, I_INVLPG, I_INT3, I_INTN, I_INTO, I_IRET, I_IRETD, I_IRETQ,
|
|
I_JMP, I_JCC,
|
|
I_LAR, I_LEA, I_LEAVE, I_LLDT, I_LMSW, I_LSL, I_LTR, I_LODS_B, I_LODS_W, I_LODS_D, I_LODS_Q, I_LOOP,
|
|
I_MOV, I_MOVBE, I_MOVS_B, I_MOVS_W, I_MOVS_D, I_MOVS_Q, I_MOVZX, I_MOVSX, I_MOVSXD, I_MUL,
|
|
I_NEG, I_NOP, I_NOT,
|
|
I_OR, I_OUT, I_OUTS_B, I_OUTS_W, I_OUTS_D,
|
|
I_POP, I_POPAD, I_POPF, I_POPFD, I_POPFQ, I_PUSH, I_PUSHAD, I_PUSHF, I_PUSHFD, I_PUSHFQ,
|
|
I_RDMSR, I_RDPMC, I_RDTSC, I_RET, I_RCL, I_RCR, I_ROL, I_ROR, I_RSM,
|
|
I_SAR, I_SHL, I_SHR, I_SBB, I_SCAS_B, I_SCAS_W, I_SCAS_D, I_SCAS_Q, I_SETCC, I_SHLD, I_SHRD, I_SGDT, I_SIDT, I_SLDT, I_SMSW, I_STC, I_STD, I_STI,
|
|
I_STOS_B, I_STOS_W, I_STOS_D, I_STOS_Q, I_SUB, I_SWAPGS, I_SYSCALL, I_SYSENTER, I_SYSEXIT, I_SYSRET,
|
|
I_TEST,
|
|
I_UD2,
|
|
I_VERR, I_VERW,
|
|
I_WAIT, I_WBINVD, I_WRMSR,
|
|
I_XADD, I_XCHG, I_XGETBV, I_XLATB, I_XOR,
|
|
|
|
I_F2XM1, I_FABS, I_FADD, I_FADDP, I_FIADD,
|
|
I_FBLD, I_FBSTP, I_FCHS, I_FCLEX, I_FNCLEX, I_FCMOVCC, I_FCOM, I_FCOMP, I_FCOMPP, I_FCOMI, I_FCOMIP, I_FCOS,
|
|
I_FDECSTP, I_FDIV, I_FDIVP, I_FIDIV, I_FDIVR, I_FDIVRP, I_FIDIVR,
|
|
I_FFREE,
|
|
I_FICOM, I_FICOMP, I_FILD, I_FINCSTP, I_FINIT, I_FNINIT, I_FIST, I_FISTP,
|
|
I_FLD, I_FLD1, I_FLDCW, I_FLDENV, I_FLDL2E, I_FLDL2T, I_FLDLG2, I_FLDLN2, I_FLDPI, I_FLDZ,
|
|
I_FMUL, I_FMULP, I_FIMUL,
|
|
I_FNOP,
|
|
I_FPATAN, I_FPREM, I_FPREM1, I_FPTAN,
|
|
I_FRNDINT, I_FRSTOR,
|
|
I_FSAVE, I_FNSAVE, I_FSCALE, I_FSIN, I_FSINCOS, I_FSQRT, I_FST, I_FSTP, I_FSTCW, I_FNSTCW, I_FSTENV, I_FNSTENV, I_FSTSW, I_FNSTSW,
|
|
I_FSUB, I_FSUBP, I_FISUB, I_FSUBR, I_FSUBRP, I_FISUBR,
|
|
I_FTST,
|
|
I_FUCOM, I_FUCOMP, I_FUCOMPP, I_FUCOMI, I_FUCOMIP,
|
|
I_FXAM, I_FXCH, I_FXRSTOR, I_FXSAVE, I_FXTRACT,
|
|
I_FYL2X, I_FYL2XP1,
|
|
|
|
I_ADDPS, I_ADDSS, I_ADDPD, I_ADDSD, I_ADDSUBPS, I_ADDSUBPD, I_ANDPS, I_ANDPD, I_ANDNPS, I_ANDNPD,
|
|
I_BLENDPS, I_BLENDPD, I_BLENDVPS, I_BLENDVPD,
|
|
I_CLFLUSH, I_CMPPS, I_CMPSS, I_CMPPD, I_CMPSD, I_COMISS, I_COMISD, I_CRC32,
|
|
I_CVTDQ2PD, I_CVTDQ2PS, I_CVTPD2DQ, I_CVTPD2PI, I_CVTPD2PS, I_CVTPI2PD, I_CVTPI2PS, I_CVTPS2DQ, I_CVTPS2PD, I_CVTPS2PI, I_CVTSD2SI,
|
|
I_CVTSD2SS, I_CVTSI2SD, I_CVTSI2SS, I_CVTSS2SD, I_CVTSS2SI, I_CVTTPD2DQ, I_CVTTPD2PI, I_CVTTPS2DQ, I_CVTTPS2PI, I_CVTTSD2SI, I_CVTTSS2SI,
|
|
I_DIVPS, I_DIVSS, I_DIVPD, I_DIVSD, I_DPPS, I_DPPD,
|
|
I_EMMS, I_EXTRACTPS,
|
|
I_FISTTP,
|
|
I_HADDPS, I_HADDPD, I_HSUBPS, I_HSUBPD,
|
|
I_INSERTPS,
|
|
I_LDDQU, I_LDMXCSR, I_LFENCE,
|
|
I_MASKMOVDQU, I_MASKMOVQ, I_MAXPS, I_MAXSS, I_MAXPD, I_MAXSD, I_MFENCE, I_MINPS, I_MINSS, I_MINPD, I_MINSD, I_MONITOR,
|
|
I_MOVAPD, I_MOVAPS, I_MOVD, I_MOVDDUP, I_MOVDQA, I_MOVDQU, I_MOVDQ2Q, I_MOVHLPS, I_MOVLHPS, I_MOVHPS, I_MOVHPD, I_MOVLPS, I_MOVLPD,
|
|
I_MOVMSKPS, I_MOVMSKPD, I_MOVNTDQ, I_MOVNTDQA, I_MOVNTI, I_MOVNTPD, I_MOVNTPS, I_MOVNTQ, I_MOVQ, I_MOVQ2DQ, I_MOVSD, I_MOVSS,
|
|
I_MOVSHDUP, I_MOVSLDUP, I_MOVUPS, I_MOVUPD, I_MPSADBW, I_MULPS, I_MULSS, I_MULPD, I_MULSD, I_MWAIT,
|
|
I_ORPS, I_ORPD,
|
|
I_PABSB, I_PABSD, I_PABSW, I_PACKSSDW, I_PACKSSWB, I_PACKUSDW, I_PACKUSWB, I_PADDB, I_PADDD, I_PADDQ, I_PADDSB, I_PADDSW, I_PADDUSB,
|
|
I_PADDUSW, I_PADDW, I_PALIGNR, I_PAND, I_PANDN, I_PAUSE, I_PAVGB, I_PAVGW,
|
|
I_PBLENDVB, I_PBLENDW,
|
|
I_PCMPEQB, I_PCMPEQW, I_PCMPEQD, I_PCMPEQQ, I_PCMPESTRI, I_PCMPESTRM, I_PCMPISTRI, I_PCMPISTRM, I_PCMPGTB, I_PCMPGTW, I_PCMPGTD, I_PCMPGTQ,
|
|
I_PEXTRB, I_PEXTRW, I_PEXTRD, I_PEXTRQ,
|
|
I_PHADDW, I_PHADDD, I_PHADDSW, I_PHMINPOSUW, I_PHSUBW, I_PHSUBD, I_PHSUBSW,
|
|
I_PINSRB, I_PINSRW, I_PINSRD, I_PINSRQ,
|
|
I_PMADDUBSW, I_PMADDWD, I_PMAXSB, I_PMAXSW, I_PMAXSD, I_PMAXUB, I_PMAXUW, I_PMAXUD, I_PMINSB, I_PMINSW, I_PMINSD, I_PMINUB, I_PMINUW,
|
|
I_PMINUD, I_PMOVMSKB, I_PMOVSXBW, I_PMOVSXBD, I_PMOVSXBQ, I_PMOVSXWD, I_PMOVSXWQ, I_PMOVSXDQ, I_PMOVZXBW, I_PMOVZXBD, I_PMOVZXBQ, I_PMOVZXWD,
|
|
I_PMOVZXWQ, I_PMOVZXDQ, I_PMULDQ, I_PMULHRSW, I_PMULHUW, I_PMULHW, I_PMULLW, I_PMULLD, I_PMULUDQ,
|
|
I_POPCNT, I_POR,
|
|
I_PREFETCH,
|
|
I_PSADBW, I_PSHUFB, I_PSHUFD, I_PSHUFHW, I_PSHUFLW, I_PSHUFW, I_PSIGNB, I_PSIGNW, I_PSIGND, I_PSLLW, I_PSLLD, I_PSLLQ, I_PSLLDQ, I_PSRAW,
|
|
I_PSRAD, I_PSRLW, I_PSRLD, I_PSRLQ, I_PSRLDQ, I_PSUBB, I_PSUBW, I_PSUBD, I_PSUBQ, I_PSUBSB, I_PSUBSW, I_PSUBUSB, I_PSUBUSW,
|
|
I_PTEST,
|
|
I_PUNPCKHBW, I_PUNPCKHWD, I_PUNPCKHDQ, I_PUNPCKHQDQ, I_PUNPCKLBW, I_PUNPCKLWD, I_PUNPCKLDQ, I_PUNPCKLQDQ,
|
|
I_PXOR,
|
|
I_RCPPS, I_RCPSS, I_ROUNDPS, I_ROUNDPD, I_ROUNDSS, I_ROUNDSD, I_RSQRTPS, I_RSQRTSS,
|
|
I_SFENCE, I_SHUFPS, I_SHUFPD, I_SQRTPS, I_SQRTSS, I_SQRTPD, I_SQRTSD, I_STMXCSR, I_SUBPS, I_SUBSS, I_SUBPD, I_SUBSD,
|
|
I_UCOMISS, I_UCOMISD, I_UNPCKHPS, I_UNPCKHPD, I_UNPCKLPS, I_UNPCKLPD,
|
|
I_XORPS, I_XORPD,
|
|
|
|
I_VBROADCASTSS, I_VBROADCASTSD, I_VBROADCASTF128,
|
|
I_VEXTRACTF128,
|
|
I_VINSERTF128,
|
|
I_VMASKMOVPS, I_VMASKMOVPD,
|
|
I_VPERMILPD, I_VPERMILPS, I_VPERM2F128,
|
|
I_VTESTPS, I_VTESTPD,
|
|
I_VZEROALL, I_VZEROUPPER,
|
|
|
|
I_AESENC, I_AESENCLAST, I_AESDEC, I_AESDECLAST, I_AESIMC, I_AESKEYGENASSIST,
|
|
I_PCLMULQDQ,
|
|
|
|
// FMA
|
|
I_VFMADD132PD, I_VFMADD213PD, I_VFMADD231PD, I_VFMADD132PS, I_VFMADD213PS, I_VFMADD231PS,
|
|
I_VFMADD132SD, I_VFMADD213SD, I_VFMADD231SD, I_VFMADD132SS, I_VFMADD213SS, I_VFMADD231SS,
|
|
I_VFMADDSUB132PD, I_VFMADDSUB213PD, I_VFMADDSUB231PD, I_VFMADDSUB132PS, I_VFMADDSUB213PS, I_VFMADDSUB231PS,
|
|
I_VFMSUBADD132PD, I_VFMSUBADD213PD, I_VFMSUBADD231PD, I_VFMSUBADD132PS, I_VFMSUBADD213PS, I_VFMSUBADD231PS,
|
|
I_VFMSUB132PD, I_VFMSUB213PD, I_VFMSUB231PD, I_VFMSUB132PS, I_VFMSUB213PS, I_VFMSUB231PS,
|
|
I_VFMSUB132SD, I_VFMSUB213SD, I_VFMSUB231SD, I_VFMSUB132SS, I_VFMSUB213SS, I_VFMSUB231SS,
|
|
I_VFNMADD132PD, I_VFNMADD213PD, I_VFNMADD231PD, I_VFNMADD132PS, I_VFNMADD213PS, I_VFNMADD231PS,
|
|
I_VFNMADD132SD, I_VFNMADD213SD, I_VFNMADD231SD, I_VFNMADD132SS, I_VFNMADD213SS, I_VFNMADD231SS,
|
|
I_VFNMSUB132PD, I_VFNMSUB213PD, I_VFNMSUB231PD, I_VFNMSUB132PS, I_VFNMSUB213PS, I_VFNMSUB231PS,
|
|
I_VFNMSUB132SD, I_VFNMSUB213SD, I_VFNMSUB231SD, I_VFNMSUB132SS, I_VFNMSUB213SS, I_VFNMSUB231SS,
|
|
|
|
// F16C
|
|
I_RDFSBASE, I_RDGSBASE, I_RDRAND, I_WRFSBASE, I_WRGSBASE, I_VCVTPH2PS, I_VCVTPS2PH,
|
|
|
|
// XOP
|
|
I_VFRCZPD, I_VFRCZPS, I_VFRCZSD, I_VFRCZSS,
|
|
I_VPCMOV, I_VPCOMB, I_VPCOMD, I_VPCOMQ, I_VPCOMUB, I_VPCOMUD, I_VPCOMUQ, I_VPCOMUW, I_VPCOMW, I_VPERMIL2PD, I_VPERMIL2PS,
|
|
I_VPHADDBD, I_VPHADDBQ, I_VPHADDBW, I_VPHADDDQ, I_VPHADDUBD, I_VPHADDUBQ, I_VPHADDUBW, I_VPHADDUDQ, I_VPHADDUWD, I_VPHADDUWQ,
|
|
I_VPHADDWD, I_VPHADDWQ, I_VPHSUBBW, I_VPHSUBDQ, I_VPHSUBWD,
|
|
I_VPMACSDD, I_VPMACSDQH, I_VPMACSDQL, I_VPMACSSDD, I_VPMACSSDQH, I_VPMACSSDQL, I_VPMACSSWD, I_VPMACSSWW, I_VPMACSWD, I_VPMACSWW,
|
|
I_VPMADCSSWD, I_VPMADCSWD,
|
|
I_VPPERM, I_VPROTB, I_VPROTD, I_VPROTQ, I_VPROTW, I_VPSHAB, I_VPSHAD, I_VPSHAQ, I_VPSHAW, I_VPSHLB, I_VPSHLD, I_VPSHLQ, I_VPSHLW,
|
|
|
|
// FMA4
|
|
I_VFMADDPD, I_VFMADDPS, I_VFMADDSD, I_VFMADDSS,
|
|
I_VFMADDSUBPD, I_VFMADDSUBPS,
|
|
I_VFMSUBADDPD, I_VFMSUBADDPS,
|
|
I_VFMSUBPD, I_VFMSUBPS, I_VFMSUBSD, I_VFMSUBSS,
|
|
I_VFNMADDPD, I_VFNMADDPS, I_VFNMADDSD, I_VFNMADDSS,
|
|
I_VFNMSUBPD, I_VFNMSUBPS, I_VFNMSUBSD, I_VFNMSUBSS,
|
|
|
|
// jitasm compiler instructions
|
|
I_COMPILER_DECLARE_REG_ARG, ///< Declare register argument
|
|
I_COMPILER_DECLARE_STACK_ARG, ///< Declare stack argument
|
|
I_COMPILER_DECLARE_RESULT_REG, ///< Declare result register (eax/rax/xmm0)
|
|
I_COMPILER_PROLOG, ///< Function prolog
|
|
I_COMPILER_EPILOG ///< Function epilog
|
|
};
|
|
|
|
enum JumpCondition
|
|
{
|
|
JCC_O, JCC_NO, JCC_B, JCC_AE, JCC_E, JCC_NE, JCC_BE, JCC_A, JCC_S, JCC_NS, JCC_P, JCC_NP, JCC_L, JCC_GE, JCC_LE, JCC_G,
|
|
JCC_CXZ, JCC_ECXZ, JCC_RCXZ,
|
|
};
|
|
|
|
enum EncodingFlags
|
|
{
|
|
E_SPECIAL = 1 << 0,
|
|
E_OPERAND_SIZE_PREFIX = 1 << 1, ///< Operand-size override prefix
|
|
E_REP_PREFIX = 1 << 2, ///< REP prefix
|
|
E_REXW_PREFIX = 1 << 3, ///< REX.W
|
|
E_MANDATORY_PREFIX_66 = 1 << 4, ///< Mandatory prefix 66
|
|
E_MANDATORY_PREFIX_F2 = 1 << 5, ///< Mandatory prefix F2
|
|
E_MANDATORY_PREFIX_F3 = 1 << 6, ///< Mandatory prefix F3
|
|
E_VEX = 1 << 7,
|
|
E_XOP = 1 << 8,
|
|
E_VEX_L = 1 << 9,
|
|
E_VEX_W = 1 << 10,
|
|
E_VEX_MMMMM_SHIFT = 11,
|
|
E_VEX_MMMMM_MASK = 0x1F << E_VEX_MMMMM_SHIFT,
|
|
E_VEX_0F = 1 << E_VEX_MMMMM_SHIFT,
|
|
E_VEX_0F38 = 2 << E_VEX_MMMMM_SHIFT,
|
|
E_VEX_0F3A = 3 << E_VEX_MMMMM_SHIFT,
|
|
E_XOP_M00011 = 3 << E_VEX_MMMMM_SHIFT,
|
|
E_XOP_M01000 = 8 << E_VEX_MMMMM_SHIFT,
|
|
E_XOP_M01001 = 9 << E_VEX_MMMMM_SHIFT,
|
|
E_VEX_PP_SHIFT = 16,
|
|
E_VEX_PP_MASK = 0x3 << E_VEX_PP_SHIFT,
|
|
E_VEX_66 = 1 << E_VEX_PP_SHIFT,
|
|
E_VEX_F3 = 2 << E_VEX_PP_SHIFT,
|
|
E_VEX_F2 = 3 << E_VEX_PP_SHIFT,
|
|
E_XOP_P00 = 0 << E_VEX_PP_SHIFT,
|
|
E_XOP_P01 = 1 << E_VEX_PP_SHIFT,
|
|
|
|
E_VEX_128 = E_VEX,
|
|
E_VEX_256 = E_VEX | E_VEX_L,
|
|
E_VEX_LIG = E_VEX,
|
|
E_VEX_LZ = E_VEX,
|
|
E_VEX_66_0F = E_VEX_66 | E_VEX_0F,
|
|
E_VEX_66_0F38 = E_VEX_66 | E_VEX_0F38,
|
|
E_VEX_66_0F3A = E_VEX_66 | E_VEX_0F3A,
|
|
E_VEX_F2_0F = E_VEX_F2 | E_VEX_0F,
|
|
E_VEX_F2_0F38 = E_VEX_F2 | E_VEX_0F38,
|
|
E_VEX_F2_0F3A = E_VEX_F2 | E_VEX_0F3A,
|
|
E_VEX_F3_0F = E_VEX_F3 | E_VEX_0F,
|
|
E_VEX_F3_0F38 = E_VEX_F3 | E_VEX_0F38,
|
|
E_VEX_F3_0F3A = E_VEX_F3 | E_VEX_0F3A,
|
|
E_VEX_W0 = 0,
|
|
E_VEX_W1 = E_VEX_W,
|
|
E_VEX_WIG = 0,
|
|
E_XOP_128 = E_XOP,
|
|
E_XOP_256 = E_XOP | E_VEX_L,
|
|
E_XOP_W0 = 0,
|
|
E_XOP_W1 = E_VEX_W,
|
|
|
|
// Aliases
|
|
E_VEX_128_0F_WIG = E_VEX_128 | E_VEX_0F | E_VEX_WIG,
|
|
E_VEX_256_0F_WIG = E_VEX_256 | E_VEX_0F | E_VEX_WIG,
|
|
E_VEX_128_66_0F_WIG = E_VEX_128 | E_VEX_66_0F | E_VEX_WIG,
|
|
E_VEX_256_66_0F_WIG = E_VEX_256 | E_VEX_66_0F | E_VEX_WIG,
|
|
E_VEX_128_66_0F38_WIG = E_VEX_128 | E_VEX_66_0F38 | E_VEX_WIG,
|
|
E_VEX_256_66_0F38_WIG = E_VEX_256 | E_VEX_66_0F38 | E_VEX_WIG,
|
|
E_VEX_128_66_0F38_W0 = E_VEX_128 | E_VEX_66_0F38 | E_VEX_W0,
|
|
E_VEX_256_66_0F38_W0 = E_VEX_256 | E_VEX_66_0F38 | E_VEX_W0,
|
|
E_VEX_128_66_0F38_W1 = E_VEX_128 | E_VEX_66_0F38 | E_VEX_W1,
|
|
E_VEX_256_66_0F38_W1 = E_VEX_256 | E_VEX_66_0F38 | E_VEX_W1,
|
|
E_VEX_128_66_0F3A_W0 = E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0,
|
|
E_VEX_256_66_0F3A_W0 = E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0,
|
|
};
|
|
|
|
/// Instruction
|
|
struct Instr
|
|
{
|
|
static const size_t MAX_OPERAND_COUNT = 6;
|
|
|
|
InstrID id_; ///< Instruction ID
|
|
uint32 opcode_; ///< Opcode
|
|
uint32 encoding_flag_; ///< EncodingFlags
|
|
detail::Opd opd_[MAX_OPERAND_COUNT]; ///< Operands
|
|
|
|
Instr(InstrID id, uint32 opcode, uint32 encoding_flag, const detail::Opd& opd1 = detail::Opd(), const detail::Opd& opd2 = detail::Opd(), const detail::Opd& opd3 = detail::Opd(), const detail::Opd& opd4 = detail::Opd(), const detail::Opd& opd5 = detail::Opd(), const detail::Opd& opd6 = detail::Opd())
|
|
: id_(id), opcode_(opcode), encoding_flag_(encoding_flag) {opd_[0] = opd1, opd_[1] = opd2, opd_[2] = opd3, opd_[3] = opd4, opd_[4] = opd5, opd_[5] = opd6;}
|
|
|
|
InstrID GetID() const {return id_;}
|
|
const detail::Opd& GetOpd(size_t index) const {return opd_[index];}
|
|
detail::Opd& GetOpd(size_t index) {return opd_[index];}
|
|
};
|
|
|
|
/// Assembler backend
|
|
struct Backend
|
|
{
|
|
uint8* pbuff_;
|
|
size_t buffsize_;
|
|
size_t size_;
|
|
|
|
Backend(void* pbuff = NULL, size_t buffsize = 0) : pbuff_((uint8*) pbuff), buffsize_(buffsize), size_(0)
|
|
{
|
|
memset(pbuff, 0xCC, buffsize); // INT3
|
|
}
|
|
|
|
size_t GetSize() const
|
|
{
|
|
return size_;
|
|
}
|
|
|
|
void put_bytes(void* p, size_t n)
|
|
{
|
|
uint8* pb = (uint8*) p;
|
|
while (n--) {
|
|
if (pbuff_) {
|
|
if (size_ == buffsize_) JITASM_ASSERT(0);
|
|
pbuff_[size_] = *pb++;
|
|
}
|
|
size_++;
|
|
}
|
|
}
|
|
void db(uint64 b) {put_bytes(&b, 1);}
|
|
void dw(uint64 w) {put_bytes(&w, 2);}
|
|
void dd(uint64 d) {put_bytes(&d, 4);}
|
|
void dq(uint64 q) {put_bytes(&q, 8);}
|
|
|
|
uint8 GetWRXB(int w, const detail::Opd& reg, const detail::Opd& r_m)
|
|
{
|
|
uint8 wrxb = w ? 8 : 0;
|
|
if (reg.IsReg()) {
|
|
if (!reg.GetReg().IsInvalid() && reg.GetReg().id >= R8) wrxb |= 4;
|
|
}
|
|
if (r_m.IsReg()) {
|
|
if (r_m.GetReg().id >= R8) wrxb |= 1;
|
|
}
|
|
if (r_m.IsMem()) {
|
|
if (!r_m.GetIndex().IsInvalid() && r_m.GetIndex().id >= R8) wrxb |= 2;
|
|
if (!r_m.GetBase().IsInvalid() && r_m.GetBase().id >= R8) wrxb |= 1;
|
|
}
|
|
return wrxb;
|
|
}
|
|
|
|
void EncodePrefixes(uint32 flag, const detail::Opd& reg, const detail::Opd& r_m, const detail::Opd& vex)
|
|
{
|
|
if (flag & (E_VEX | E_XOP)) {
|
|
// Encode VEX prefix
|
|
#ifdef JITASM64
|
|
if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67);
|
|
#endif
|
|
uint8 vvvv = vex.IsReg() ? 0xF - (uint8) vex.GetReg().id : 0xF;
|
|
uint8 mmmmm = (flag & E_VEX_MMMMM_MASK) >> E_VEX_MMMMM_SHIFT;
|
|
uint8 pp = static_cast<uint8>((flag & E_VEX_PP_MASK) >> E_VEX_PP_SHIFT);
|
|
uint8 wrxb = GetWRXB(flag & E_VEX_W, reg, r_m);
|
|
if (flag & E_XOP) {
|
|
db(0x8F);
|
|
db((~wrxb & 7) << 5 | mmmmm);
|
|
db((wrxb & 8) << 4 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp);
|
|
} else if (wrxb & 0xB || (flag & E_VEX_MMMMM_MASK) == E_VEX_0F38 || (flag & E_VEX_MMMMM_MASK) == E_VEX_0F3A) {
|
|
db(0xC4);
|
|
db((~wrxb & 7) << 5 | mmmmm);
|
|
db((wrxb & 8) << 4 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp);
|
|
} else {
|
|
db(0xC5);
|
|
db((~wrxb & 4) << 5 | vvvv << 3 | (flag & E_VEX_L ? 4 : 0) | pp);
|
|
}
|
|
} else {
|
|
uint8 wrxb = GetWRXB(flag & E_REXW_PREFIX, reg, r_m);
|
|
if (wrxb) {
|
|
// Encode REX prefix
|
|
JITASM_ASSERT(!reg.IsReg() || reg.GetSize() != O_SIZE_8 || reg.GetReg().id < AH || reg.GetReg().id >= R8B); // AH, BH, CH, or DH may not be used with REX.
|
|
JITASM_ASSERT(!r_m.IsReg() || r_m.GetSize() != O_SIZE_8 || r_m.GetReg().id < AH || r_m.GetReg().id >= R8B); // AH, BH, CH, or DH may not be used with REX.
|
|
|
|
if (flag & E_REP_PREFIX) db(0xF3);
|
|
#ifdef JITASM64
|
|
if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67);
|
|
#endif
|
|
if (flag & E_OPERAND_SIZE_PREFIX) db(0x66);
|
|
|
|
if (flag & E_MANDATORY_PREFIX_66) db(0x66);
|
|
else if (flag & E_MANDATORY_PREFIX_F2) db(0xF2);
|
|
else if (flag & E_MANDATORY_PREFIX_F3) db(0xF3);
|
|
|
|
db(0x40 | wrxb);
|
|
} else {
|
|
if (flag & E_MANDATORY_PREFIX_66) db(0x66);
|
|
else if (flag & E_MANDATORY_PREFIX_F2) db(0xF2);
|
|
else if (flag & E_MANDATORY_PREFIX_F3) db(0xF3);
|
|
|
|
if (flag & E_REP_PREFIX) db(0xF3);
|
|
#ifdef JITASM64
|
|
if (r_m.IsMem() && r_m.GetAddressSize() != O_SIZE_64) db(0x67);
|
|
#endif
|
|
if (flag & E_OPERAND_SIZE_PREFIX) db(0x66);
|
|
}
|
|
}
|
|
}
|
|
|
|
void EncodeModRM(uint8 reg, const detail::Opd& r_m)
|
|
{
|
|
reg &= 0x7;
|
|
|
|
if (r_m.IsReg()) {
|
|
db(0xC0 | (reg << 3) | (r_m.GetReg().id & 0x7));
|
|
} else if (r_m.IsMem()) {
|
|
JITASM_ASSERT(r_m.GetBase().type == R_TYPE_GP && r_m.GetIndex().type == R_TYPE_GP);
|
|
int base = r_m.GetBase().id; if (base != INVALID) base &= 0x7;
|
|
int index = r_m.GetIndex().id; if (index != INVALID) index &= 0x7;
|
|
|
|
if (base == INVALID && index == INVALID) {
|
|
#ifdef JITASM64
|
|
db(reg << 3 | 4);
|
|
db(0x25);
|
|
#else
|
|
db(reg << 3 | 5);
|
|
#endif
|
|
dd(r_m.GetDisp());
|
|
} else {
|
|
JITASM_ASSERT(base != ESP || index != ESP);
|
|
JITASM_ASSERT(index != ESP || r_m.GetScale() == 0);
|
|
|
|
if (index == ESP) {
|
|
index = base;
|
|
base = ESP;
|
|
}
|
|
bool sib = index != INVALID || r_m.GetScale() || base == ESP;
|
|
|
|
// ModR/M
|
|
uint8 mod = 0;
|
|
if (r_m.GetDisp() == 0 || (sib && base == INVALID)) mod = base != EBP ? 0 : 1;
|
|
else if (detail::IsInt8(r_m.GetDisp())) mod = 1;
|
|
else if (detail::IsInt32(r_m.GetDisp())) mod = 2;
|
|
else JITASM_ASSERT(0);
|
|
db(mod << 6 | reg << 3 | (sib ? 4 : base));
|
|
|
|
// SIB
|
|
if (sib) {
|
|
uint8 ss = 0;
|
|
if (r_m.GetScale() == 0) ss = 0;
|
|
else if (r_m.GetScale() == 2) ss = 1;
|
|
else if (r_m.GetScale() == 4) ss = 2;
|
|
else if (r_m.GetScale() == 8) ss = 3;
|
|
else JITASM_ASSERT(0);
|
|
if (index != INVALID && base != INVALID) {
|
|
db(ss << 6 | index << 3 | base);
|
|
} else if (base != INVALID) {
|
|
db(ss << 6 | 4 << 3 | base);
|
|
} else if (index != INVALID) {
|
|
db(ss << 6 | index << 3 | 5);
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
// Displacement
|
|
if (mod == 0 && sib && base == INVALID) dd(r_m.GetDisp());
|
|
if (mod == 1) db(r_m.GetDisp());
|
|
if (mod == 2) dd(r_m.GetDisp());
|
|
}
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
void EncodeOpcode(uint32 opcode)
|
|
{
|
|
if (opcode & 0xFF000000) db((opcode >> 24) & 0xFF);
|
|
if (opcode & 0xFFFF0000) db((opcode >> 16) & 0xFF);
|
|
if (opcode & 0xFFFFFF00) db((opcode >> 8) & 0xFF);
|
|
db(opcode & 0xFF);
|
|
}
|
|
|
|
void EncodeImm(const detail::Opd& imm)
|
|
{
|
|
const OpdSize size = imm.GetSize();
|
|
if (size == O_SIZE_8) db(imm.GetImm());
|
|
else if (size == O_SIZE_16) dw(imm.GetImm());
|
|
else if (size == O_SIZE_32) dd(imm.GetImm());
|
|
else if (size == O_SIZE_64) dq(imm.GetImm());
|
|
else JITASM_ASSERT(0);
|
|
}
|
|
|
|
void Encode(const Instr& instr)
|
|
{
|
|
uint32 opcode = instr.opcode_;
|
|
|
|
const detail::Opd& opd1 = instr.GetOpd(0).IsDummy() ? detail::Opd() : instr.GetOpd(0); JITASM_ASSERT(!(opd1.IsReg() && opd1.GetReg().IsSymbolic()));
|
|
const detail::Opd& opd2 = instr.GetOpd(1).IsDummy() ? detail::Opd() : instr.GetOpd(1); JITASM_ASSERT(!(opd2.IsReg() && opd2.GetReg().IsSymbolic()));
|
|
const detail::Opd& opd3 = instr.GetOpd(2).IsDummy() ? detail::Opd() : instr.GetOpd(2); JITASM_ASSERT(!(opd3.IsReg() && opd3.GetReg().IsSymbolic()));
|
|
const detail::Opd& opd4 = instr.GetOpd(3).IsDummy() ? detail::Opd() : instr.GetOpd(3); JITASM_ASSERT(!(opd4.IsReg() && opd4.GetReg().IsSymbolic()));
|
|
|
|
// +rb, +rw, +rd, +ro
|
|
if (opd1.IsReg() && (opd2.IsNone() || opd2.IsImm())) {
|
|
opcode += opd1.GetReg().id & 0x7;
|
|
}
|
|
|
|
if ((opd1.IsImm() || opd1.IsReg()) && (opd2.IsReg() || opd2.IsMem())) { // ModR/M
|
|
const detail::Opd& reg = opd1;
|
|
const detail::Opd& r_m = opd2;
|
|
const detail::Opd& vex = opd3;
|
|
EncodePrefixes(instr.encoding_flag_, reg, r_m, vex);
|
|
EncodeOpcode(opcode);
|
|
EncodeModRM((uint8) (reg.IsImm() ? reg.GetImm() : reg.GetReg().id), r_m);
|
|
|
|
// /is4
|
|
if (opd4.IsReg()) {
|
|
EncodeImm(Imm8(static_cast<uint8>(opd4.GetReg().id << 4)));
|
|
}
|
|
} else {
|
|
const detail::Opd& reg = detail::Opd();
|
|
const detail::Opd& r_m = opd1.IsReg() ? opd1 : detail::Opd();
|
|
const detail::Opd& vex = detail::Opd();
|
|
EncodePrefixes(instr.encoding_flag_, reg, r_m, vex);
|
|
EncodeOpcode(opcode);
|
|
}
|
|
|
|
if (opd1.IsImm() && !opd2.IsReg() && !opd2.IsMem()) EncodeImm(opd1);
|
|
if (opd2.IsImm()) EncodeImm(opd2);
|
|
if (opd3.IsImm()) EncodeImm(opd3);
|
|
if (opd4.IsImm()) EncodeImm(opd4);
|
|
}
|
|
|
|
void EncodeALU(const Instr& instr, uint32 opcode)
|
|
{
|
|
const detail::Opd& reg = instr.GetOpd(1);
|
|
const detail::Opd& imm = instr.GetOpd(2);
|
|
JITASM_ASSERT(instr.GetOpd(0).IsImm() && reg.IsReg() && imm.IsImm());
|
|
|
|
if (reg.GetReg().id == EAX && (reg.GetSize() == O_SIZE_8 || !detail::IsInt8(imm.GetImm()))) {
|
|
opcode |= (reg.GetSize() == O_SIZE_8 ? 0 : 1);
|
|
Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, reg, imm));
|
|
} else {
|
|
Encode(instr);
|
|
}
|
|
}
|
|
|
|
void EncodeJMP(const Instr& instr)
|
|
{
|
|
const detail::Opd& imm = instr.GetOpd(0);
|
|
if (instr.GetID() == I_JMP) {
|
|
Encode(Instr(instr.GetID(), imm.GetSize() == O_SIZE_8 ? 0xEB : 0xE9, instr.encoding_flag_, imm));
|
|
} else if (instr.GetID() == I_JCC) {
|
|
#ifndef JITASM64
|
|
uint32 tttn = instr.opcode_;
|
|
if (tttn == JCC_CXZ) Encode(Instr(instr.GetID(), 0x67E3, instr.encoding_flag_, imm));
|
|
else if (tttn == JCC_ECXZ) Encode(Instr(instr.GetID(), 0xE3, instr.encoding_flag_, imm));
|
|
else Encode(Instr(instr.GetID(), (imm.GetSize() == O_SIZE_8 ? 0x70 : 0x0F80) | tttn, instr.encoding_flag_, imm));
|
|
#else
|
|
uint32 tttn = instr.opcode_;
|
|
if (tttn == JCC_ECXZ) Encode(Instr(instr.GetID(), 0x67E3, instr.encoding_flag_, imm));
|
|
else if (tttn == JCC_RCXZ) Encode(Instr(instr.GetID(), 0xE3, instr.encoding_flag_, imm));
|
|
else Encode(Instr(instr.GetID(), (imm.GetSize() == O_SIZE_8 ? 0x70 : 0x0F80) | tttn, instr.encoding_flag_, imm));
|
|
#endif
|
|
} else if (instr.GetID() == I_LOOP) {
|
|
Encode(Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, imm));
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
void EncodeMOV(const Instr& instr)
|
|
{
|
|
#ifndef JITASM64
|
|
const detail::Opd& reg = instr.GetOpd(0);
|
|
const detail::Opd& mem = instr.GetOpd(1);
|
|
JITASM_ASSERT(reg.IsReg() && mem.IsMem());
|
|
|
|
if (reg.GetReg().id == EAX && mem.GetBase().IsInvalid() && mem.GetIndex().IsInvalid()) {
|
|
uint32 opcode = 0xA0 | (~instr.opcode_ & 0x2) | (instr.opcode_ & 1);
|
|
Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, Imm32((sint32) mem.GetDisp())));
|
|
} else {
|
|
Encode(instr);
|
|
}
|
|
#else
|
|
Encode(instr);
|
|
#endif
|
|
}
|
|
|
|
void EncodeTEST(const Instr& instr)
|
|
{
|
|
const detail::Opd& reg = instr.GetOpd(1);
|
|
const detail::Opd& imm = instr.GetOpd(2);
|
|
JITASM_ASSERT(instr.GetOpd(0).IsImm() && reg.IsReg() && imm.IsImm());
|
|
|
|
if (reg.GetReg().id == EAX) {
|
|
uint32 opcode = 0xA8 | (reg.GetSize() == O_SIZE_8 ? 0 : 1);
|
|
Encode(Instr(instr.GetID(), opcode, instr.encoding_flag_, reg, imm));
|
|
} else {
|
|
Encode(instr);
|
|
}
|
|
}
|
|
|
|
void EncodeXCHG(const Instr& instr)
|
|
{
|
|
const detail::Opd& dst = instr.GetOpd(0);
|
|
const detail::Opd& src = instr.GetOpd(1);
|
|
JITASM_ASSERT(dst.IsReg() && src.IsReg());
|
|
|
|
if (dst.GetReg().id == EAX) {
|
|
Encode(Instr(instr.GetID(), 0x90, instr.encoding_flag_, src));
|
|
} else if (src.GetReg().id == EAX) {
|
|
Encode(Instr(instr.GetID(), 0x90, instr.encoding_flag_, dst));
|
|
} else {
|
|
Encode(instr);
|
|
}
|
|
}
|
|
|
|
void Assemble(const Instr& instr)
|
|
{
|
|
if (instr.encoding_flag_ & E_SPECIAL) {
|
|
switch (instr.GetID()) {
|
|
case I_ADD: EncodeALU(instr, 0x04); break;
|
|
case I_OR: EncodeALU(instr, 0x0C); break;
|
|
case I_ADC: EncodeALU(instr, 0x14); break;
|
|
case I_SBB: EncodeALU(instr, 0x1C); break;
|
|
case I_AND: EncodeALU(instr, 0x24); break;
|
|
case I_SUB: EncodeALU(instr, 0x2C); break;
|
|
case I_XOR: EncodeALU(instr, 0x34); break;
|
|
case I_CMP: EncodeALU(instr, 0x3C); break;
|
|
case I_JMP: EncodeJMP(instr); break;
|
|
case I_JCC: EncodeJMP(instr); break;
|
|
case I_LOOP: EncodeJMP(instr); break;
|
|
case I_MOV: EncodeMOV(instr); break;
|
|
case I_TEST: EncodeTEST(instr); break;
|
|
case I_XCHG: EncodeXCHG(instr); break;
|
|
default: JITASM_ASSERT(0); break;
|
|
}
|
|
} else {
|
|
Encode(instr);
|
|
}
|
|
}
|
|
|
|
static size_t GetInstrCodeSize(const Instr& instr)
|
|
{
|
|
Backend backend;
|
|
backend.Assemble(instr);
|
|
return backend.GetSize();
|
|
}
|
|
};
|
|
|
|
namespace detail
|
|
{
|
|
/// Counting 1-Bits
|
|
inline uint32 Count1Bits(uint32 x)
|
|
{
|
|
x = x - ((x >> 1) & 0x55555555);
|
|
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
|
x = (x + (x >> 4)) & 0x0F0F0F0F;
|
|
x = x + (x >> 8);
|
|
x = x + (x >> 16);
|
|
return x & 0x0000003F;
|
|
}
|
|
|
|
/// The bit position of the first bit 1.
|
|
inline uint32 bit_scan_forward(uint32 x)
|
|
{
|
|
JITASM_ASSERT(x != 0);
|
|
#if defined(JITASM_GCC)
|
|
return __builtin_ctz(x);
|
|
#else
|
|
unsigned long index;
|
|
_BitScanForward(&index, x);
|
|
return index;
|
|
#endif
|
|
}
|
|
|
|
/// The bit position of the last bit 1.
|
|
inline uint32 bit_scan_reverse(uint32 x)
|
|
{
|
|
JITASM_ASSERT(x != 0);
|
|
#if defined(JITASM_GCC)
|
|
return 31 - __builtin_clz(x);
|
|
#else
|
|
unsigned long index;
|
|
_BitScanReverse(&index, x);
|
|
return index;
|
|
#endif
|
|
}
|
|
|
|
/// Prior iterator
|
|
template<class It> It prior(const It &it) {
|
|
It i = it;
|
|
return --i;
|
|
}
|
|
|
|
/// Next iterator
|
|
template<class It> It next(const It &it) {
|
|
It i = it;
|
|
return ++i;
|
|
}
|
|
|
|
/// Iterator range
|
|
template<class T, class It = typename T::iterator> struct Range : std::pair<It, It> {
|
|
typedef It Iterator;
|
|
Range() : std::pair<It, It>() {}
|
|
Range(const It& f, const It& s) : std::pair<It, It>(f, s) {}
|
|
Range(T& container) : std::pair<It, It>(container.begin(), container.end()) {}
|
|
bool empty() const {return this->first == this->second;}
|
|
size_t size() const {return std::distance(this->first, this->second);}
|
|
};
|
|
|
|
/// Const iterator range
|
|
template<class T> struct ConstRange : Range<T, typename T::const_iterator> {
|
|
ConstRange() : Range<T, typename T::const_iterator>() {}
|
|
ConstRange(const typename T::const_iterator& f, const typename T::const_iterator& s) : Range<T, typename T::const_iterator>(f, s) {}
|
|
ConstRange(const T& container) : Range<T, typename T::const_iterator>(container.begin(), container.end()) {}
|
|
};
|
|
|
|
inline void append_num(std::string& str, size_t num)
|
|
{
|
|
if (num >= 10)
|
|
append_num(str, num / 10);
|
|
str.append(1, static_cast<char>('0' + num % 10));
|
|
}
|
|
|
|
#if defined(JITASM_WIN)
|
|
/// Debug trace
|
|
inline void Trace(const char *format, ...)
|
|
{
|
|
char szBuf[256];
|
|
va_list args;
|
|
va_start(args, format);
|
|
#if _MSC_VER >= 1400 // VC8 or later
|
|
_vsnprintf_s(szBuf, sizeof(szBuf) / sizeof(char), format, args);
|
|
#else
|
|
vsnprintf(szBuf, sizeof(szBuf) / sizeof(char), format, args);
|
|
#endif
|
|
va_end(args);
|
|
::OutputDebugStringA(szBuf);
|
|
}
|
|
#endif
|
|
|
|
/// Executable code buffer
|
|
class CodeBuffer
|
|
{
|
|
void* pbuff_;
|
|
size_t codesize_;
|
|
size_t buffsize_;
|
|
|
|
public:
|
|
CodeBuffer() : pbuff_(NULL), codesize_(0), buffsize_(0) {}
|
|
~CodeBuffer() {Reset(0);}
|
|
|
|
void* GetPointer() const {return pbuff_;}
|
|
size_t GetCodeSize() const {return codesize_;}
|
|
size_t GetBufferSize() const {return buffsize_;}
|
|
|
|
bool Reset(size_t codesize)
|
|
{
|
|
if (pbuff_) {
|
|
#if defined(JITASM_WIN)
|
|
::VirtualFree(pbuff_, 0, MEM_RELEASE);
|
|
#else
|
|
munmap(pbuff_, buffsize_);
|
|
#endif
|
|
pbuff_ = NULL;
|
|
codesize_ = 0;
|
|
buffsize_ = 0;
|
|
}
|
|
if (codesize) {
|
|
#if defined(JITASM_WIN)
|
|
void* pbuff = ::VirtualAlloc(NULL, codesize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
|
if (!pbuff) {
|
|
JITASM_ASSERT(0);
|
|
return false;
|
|
}
|
|
MEMORY_BASIC_INFORMATION info;
|
|
::VirtualQuery(pbuff, &info, sizeof(info));
|
|
buffsize_ = info.RegionSize;
|
|
#else
|
|
int pagesize = getpagesize();
|
|
size_t buffsize = (codesize + pagesize - 1) / pagesize * pagesize;
|
|
void* pbuff = mmap(NULL, buffsize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
|
|
if (!pbuff) {
|
|
JITASM_ASSERT(0);
|
|
return false;
|
|
}
|
|
buffsize_ = buffsize;
|
|
#endif
|
|
|
|
pbuff_ = pbuff;
|
|
codesize_ = codesize;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/// Stack manager
|
|
/**
|
|
* <b>Stack layout</b>
|
|
* \verbatim
|
|
* +-----------------------+
|
|
* | Caller return address |
|
|
* +=======================+========
|
|
* | ebp (rbp) |
|
|
* +-----------------------+ <-- ebp (rbp)
|
|
* | Saved gp registers |
|
|
* +-----------------------+
|
|
* | Padding for alignment |
|
|
* +-----------------------+ <-- Stack base
|
|
* | Spill slots and |
|
|
* | local variable |
|
|
* +-----------------------+ <-- esp (rsp)
|
|
* \endverbatim
|
|
*/
|
|
class StackManager
|
|
{
|
|
private:
|
|
Addr stack_base_;
|
|
uint32 stack_size_;
|
|
|
|
public:
|
|
StackManager() : stack_base_(RegID::CreatePhysicalRegID(R_TYPE_GP, EBX), 0), stack_size_(0) {}
|
|
|
|
/// Get allocated stack size
|
|
uint32 GetSize() const {return (stack_size_ + 15) / 16 * 16; /* 16 bytes aligned*/}
|
|
|
|
/// Get stack base
|
|
Addr GetStackBase() const {return stack_base_;}
|
|
|
|
/// Set stack base
|
|
void SetStackBase(const Addr& stack_base) {stack_base_ = stack_base;}
|
|
|
|
/// Allocate stack
|
|
Addr Alloc(uint32 size, uint32 alignment)
|
|
{
|
|
stack_size_ = (stack_size_ + alignment - 1) / alignment * alignment;
|
|
stack_size_ += size;
|
|
return stack_base_ - stack_size_;
|
|
}
|
|
};
|
|
|
|
/// Spin lock
|
|
class SpinLock
|
|
{
|
|
long lock_;
|
|
public:
|
|
SpinLock() : lock_(0) {}
|
|
void Lock() {while (interlocked_exchange(&lock_, 1));}
|
|
void Unlock() {interlocked_exchange(&lock_, 0);}
|
|
};
|
|
|
|
template<class Ty>
|
|
class ScopedLock
|
|
{
|
|
Ty& lock_;
|
|
ScopedLock<Ty>& operator=(const ScopedLock<Ty>&);
|
|
public:
|
|
ScopedLock(Ty& lock) : lock_(lock) {lock.Lock();}
|
|
~ScopedLock() {lock_.Unlock();}
|
|
};
|
|
} // namespace detail
|
|
|
|
// compiler prototype declaration
|
|
struct Frontend;
|
|
namespace compiler {
|
|
void Compile(Frontend& f);
|
|
}
|
|
|
|
/// jitasm frontend
|
|
struct Frontend
|
|
{
|
|
typedef jitasm::Addr Addr;
|
|
typedef jitasm::Reg Reg;
|
|
typedef jitasm::Reg8 Reg8;
|
|
typedef jitasm::Reg16 Reg16;
|
|
typedef jitasm::Reg32 Reg32;
|
|
#ifdef JITASM64
|
|
typedef jitasm::Reg64 Reg64;
|
|
#endif
|
|
typedef jitasm::MmxReg MmxReg;
|
|
typedef jitasm::XmmReg XmmReg;
|
|
typedef jitasm::YmmReg YmmReg;
|
|
|
|
Reg8_al al;
|
|
Reg8_cl cl;
|
|
Reg8 dl, bl, ah, ch, dh, bh;
|
|
Reg16_ax ax;
|
|
Reg16_dx dx;
|
|
Reg16 cx, bx, sp, bp, si, di;
|
|
Reg32_eax eax;
|
|
Reg32 ecx, edx, ebx, esp, ebp, esi, edi;
|
|
FpuReg_st0 st0;
|
|
FpuReg st1, st2, st3, st4, st5, st6, st7;
|
|
MmxReg mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
|
|
XmmReg xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
|
|
YmmReg ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
|
|
#ifdef JITASM64
|
|
Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b;
|
|
Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w;
|
|
Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d;
|
|
Reg64_rax rax;
|
|
Reg64 rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15;
|
|
XmmReg xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15;
|
|
YmmReg ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
|
|
#endif
|
|
|
|
AddressingPtr<Opd8> byte_ptr;
|
|
AddressingPtr<Opd16> word_ptr;
|
|
AddressingPtr<Opd32> dword_ptr;
|
|
AddressingPtr<Opd64> qword_ptr;
|
|
AddressingPtr<Opd64> mmword_ptr;
|
|
AddressingPtr<Opd128> xmmword_ptr;
|
|
AddressingPtr<Opd256> ymmword_ptr;
|
|
AddressingPtr<Opd32> real4_ptr;
|
|
AddressingPtr<Opd64> real8_ptr;
|
|
AddressingPtr<Opd80> real10_ptr;
|
|
AddressingPtr<Opd16> m2byte_ptr;
|
|
AddressingPtr<Opd224> m28byte_ptr;
|
|
AddressingPtr<Opd864> m108byte_ptr;
|
|
AddressingPtr<Opd4096> m512byte_ptr;
|
|
|
|
Reg zcx, zdx, zbx, zsp, zbp, zsi, zdi;
|
|
#ifdef JITASM64
|
|
Reg64_rax zax;
|
|
AddressingPtr<Opd64> ptr;
|
|
#else
|
|
Reg32_eax zax;
|
|
AddressingPtr<Opd32> ptr;
|
|
#endif
|
|
|
|
Frontend()
|
|
: dl(DL), bl(BL), ah(AH), ch(CH), dh(DH), bh(BH),
|
|
cx(CX), bx(BX), sp(SP), bp(BP), si(SI), di(DI),
|
|
ecx(ECX), edx(EDX), ebx(EBX), esp(ESP), ebp(EBP), esi(ESI), edi(EDI),
|
|
st1(ST1), st2(ST2), st3(ST3), st4(ST4), st5(ST5), st6(ST6), st7(ST7),
|
|
mm0(MM0), mm1(MM1), mm2(MM2), mm3(MM3), mm4(MM4), mm5(MM5), mm6(MM6), mm7(MM7),
|
|
xmm0(XMM0), xmm1(XMM1), xmm2(XMM2), xmm3(XMM3), xmm4(XMM4), xmm5(XMM5), xmm6(XMM6), xmm7(XMM7),
|
|
ymm0(YMM0), ymm1(YMM1), ymm2(YMM2), ymm3(YMM3), ymm4(YMM4), ymm5(YMM5), ymm6(YMM6), ymm7(YMM7),
|
|
#ifdef JITASM64
|
|
r8b(R8B), r9b(R9B), r10b(R10B), r11b(R11B), r12b(R12B), r13b(R13B), r14b(R14B), r15b(R15B),
|
|
r8w(R8W), r9w(R9W), r10w(R10W), r11w(R11W), r12w(R12W), r13w(R13W), r14w(R14W), r15w(R15W),
|
|
r8d(R8D), r9d(R9D), r10d(R10D), r11d(R11D), r12d(R12D), r13d(R13D), r14d(R14D), r15d(R15D),
|
|
rcx(RCX), rdx(RDX), rbx(RBX), rsp(RSP), rbp(RBP), rsi(RSI), rdi(RDI),
|
|
r8(R8), r9(R9), r10(R10), r11(R11), r12(R12), r13(R13), r14(R14), r15(R15),
|
|
xmm8(XMM8), xmm9(XMM9), xmm10(XMM10), xmm11(XMM11), xmm12(XMM12), xmm13(XMM13), xmm14(XMM14), xmm15(XMM15),
|
|
ymm8(YMM8), ymm9(YMM9), ymm10(YMM10), ymm11(YMM11), ymm12(YMM12), ymm13(YMM13), ymm14(YMM14), ymm15(YMM15),
|
|
zcx(RCX), zdx(RDX), zbx(RBX), zsp(RSP), zbp(RBP), zsi(RSI), zdi(RDI),
|
|
#else
|
|
zcx(ECX), zdx(EDX), zbx(EBX), zsp(ESP), zbp(EBP), zsi(ESI), zdi(EDI),
|
|
#endif
|
|
assembled_(false)
|
|
{
|
|
}
|
|
|
|
virtual ~Frontend() {}
|
|
|
|
typedef std::vector<Instr> InstrList;
|
|
InstrList instrs_;
|
|
bool assembled_;
|
|
detail::CodeBuffer codebuff_;
|
|
detail::SpinLock codelock_;
|
|
detail::StackManager stack_manager_;
|
|
|
|
struct Label
|
|
{
|
|
std::string name;
|
|
size_t instr_number;
|
|
explicit Label(const std::string& name_) : name(name_), instr_number(0) {}
|
|
};
|
|
typedef std::deque<Label> LabelList;
|
|
LabelList labels_;
|
|
|
|
virtual void InternalMain() = 0;
|
|
|
|
/// Declare variable of the function argument on register
|
|
void DeclareRegArg(const detail::Opd& var, const detail::Opd& arg, const detail::Opd& spill_slot = detail::Opd())
|
|
{
|
|
JITASM_ASSERT(var.IsReg() && arg.IsReg());
|
|
// Insert special instruction after Prolog
|
|
InstrList::iterator it = instrs_.begin();
|
|
if (!instrs_.empty() && instrs_[0].GetID() == I_COMPILER_PROLOG) ++it;
|
|
// The arg is passed as register constraint of the var.
|
|
instrs_.insert(it, Instr(I_COMPILER_DECLARE_REG_ARG, 0, E_SPECIAL, Dummy(W(var), arg), spill_slot));
|
|
}
|
|
|
|
/// Declare variable of the function argument on stack
|
|
void DeclareStackArg(const detail::Opd& var, const detail::Opd& arg)
|
|
{
|
|
JITASM_ASSERT(var.IsReg() && arg.IsMem());
|
|
// Insert special instruction after Prolog
|
|
InstrList::iterator it = instrs_.begin();
|
|
if (!instrs_.empty() && instrs_[0].GetID() == I_COMPILER_PROLOG) ++it;
|
|
instrs_.insert(it, Instr(I_COMPILER_DECLARE_STACK_ARG, 0, E_SPECIAL, W(var), R(arg)));
|
|
}
|
|
|
|
/// Declare variable of the function result on register
|
|
void DeclareResultReg(const detail::Opd& var)
|
|
{
|
|
JITASM_ASSERT(var.IsReg());
|
|
// The result register is passed as register constraint of the var.
|
|
if (var.IsGpReg()) {
|
|
AppendInstr(I_COMPILER_DECLARE_RESULT_REG, 0, E_SPECIAL, Dummy(R(var), zax));
|
|
} else if (var.IsMmxReg()) {
|
|
AppendInstr(I_COMPILER_DECLARE_RESULT_REG, 0, E_SPECIAL, Dummy(R(var), mm0));
|
|
} else if (var.IsXmmReg()) {
|
|
AppendInstr(I_COMPILER_DECLARE_RESULT_REG, 0, E_SPECIAL, Dummy(R(var), xmm0));
|
|
}
|
|
}
|
|
|
|
/// Function prolog
|
|
void Prolog()
|
|
{
|
|
AppendInstr(I_COMPILER_PROLOG, 0, E_SPECIAL);
|
|
}
|
|
|
|
/// Function epilog
|
|
void Epilog()
|
|
{
|
|
AppendInstr(I_COMPILER_EPILOG, 0, E_SPECIAL);
|
|
}
|
|
|
|
static bool IsJump(InstrID id)
|
|
{
|
|
return id == I_JMP || id == I_JCC || id == I_LOOP;
|
|
}
|
|
|
|
size_t GetJumpTo(const Instr& instr) const
|
|
{
|
|
size_t label_id = (size_t) instr.GetOpd(0).GetImm();
|
|
JITASM_ASSERT(labels_[label_id].instr_number != (size_t)-1); // invalid label
|
|
return labels_[label_id].instr_number;
|
|
}
|
|
|
|
// TODO: Return an error when there is no destination.
|
|
void ResolveJump()
|
|
{
|
|
// Replace label indexes with instruncion numbers.
|
|
for (InstrList::iterator it = instrs_.begin(); it != instrs_.end(); ++it) {
|
|
Instr& instr = *it;
|
|
if (IsJump(instr.GetID())) {
|
|
instr = Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, Imm8(0x7F), Imm64(GetJumpTo(instr))); // Opd(0) = max value in sint8, Opd(1) = instruction number
|
|
}
|
|
}
|
|
|
|
// Resolve operand sizes.
|
|
std::vector<int> offsets;
|
|
offsets.reserve(instrs_.size() + 1);
|
|
bool retry;
|
|
do {
|
|
offsets.clear();
|
|
offsets.push_back(0);
|
|
Backend pre;
|
|
for (InstrList::const_iterator it = instrs_.begin(); it != instrs_.end(); ++it) {
|
|
pre.Assemble(*it);
|
|
offsets.push_back((int) pre.GetSize());
|
|
}
|
|
|
|
retry = false;
|
|
for (size_t i = 0; i < instrs_.size(); i++) {
|
|
Instr& instr = instrs_[i];
|
|
if (IsJump(instr.GetID())) {
|
|
size_t d = (size_t) instr.GetOpd(1).GetImm();
|
|
int rel = (int) offsets[d] - offsets[i + 1];
|
|
OpdSize size = instr.GetOpd(0).GetSize();
|
|
if (size == O_SIZE_8) {
|
|
if (!detail::IsInt8(rel)) {
|
|
// jrcxz, jcxz, jecxz, loop, loope, loopne are only for short jump
|
|
uint32 tttn = instr.opcode_;
|
|
if (instr.GetID() == I_JCC && (tttn == JCC_CXZ || tttn == JCC_ECXZ || tttn == JCC_RCXZ)) JITASM_ASSERT(0);
|
|
if (instr.GetID() == I_LOOP) JITASM_ASSERT(0);
|
|
|
|
// Retry with immediate 32
|
|
instr = Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, Imm32(0x7FFFFFFF), Imm64(instr.GetOpd(1).GetImm()));
|
|
retry = true;
|
|
}
|
|
} else if (size == O_SIZE_32) {
|
|
JITASM_ASSERT(detail::IsInt32(rel)); // There is no jump instruction larger than immediate 32.
|
|
}
|
|
}
|
|
}
|
|
} while (retry);
|
|
|
|
// Resolve immediates
|
|
for (size_t i = 0; i < instrs_.size(); i++) {
|
|
Instr& instr = instrs_[i];
|
|
if (IsJump(instr.GetID())) {
|
|
size_t d = (size_t) instr.GetOpd(1).GetImm();
|
|
int rel = (int) offsets[d] - offsets[i + 1];
|
|
OpdSize size = instr.GetOpd(0).GetSize();
|
|
if (size == O_SIZE_8) {
|
|
JITASM_ASSERT(detail::IsInt8(rel));
|
|
instr = Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, Imm8((uint8) rel));
|
|
} else if (size == O_SIZE_32) {
|
|
JITASM_ASSERT(detail::IsInt32(rel));
|
|
instr = Instr(instr.GetID(), instr.opcode_, instr.encoding_flag_, Imm32((uint32) rel));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Assemble
|
|
void Assemble()
|
|
{
|
|
detail::ScopedLock<detail::SpinLock> lock(codelock_);
|
|
if (assembled_) return;
|
|
|
|
instrs_.clear();
|
|
labels_.clear();
|
|
instrs_.reserve(128);
|
|
|
|
InternalMain();
|
|
compiler::Compile(*this);
|
|
|
|
// Resolve jump instructions
|
|
if (!labels_.empty()) {
|
|
ResolveJump();
|
|
}
|
|
|
|
// Count total size of machine code
|
|
Backend pre;
|
|
for (InstrList::const_iterator it = instrs_.begin(); it != instrs_.end(); ++it) {
|
|
pre.Assemble(*it);
|
|
}
|
|
size_t codesize = pre.GetSize();
|
|
|
|
// Write machine code to the buffer
|
|
codebuff_.Reset(codesize);
|
|
Backend backend(codebuff_.GetPointer(), codebuff_.GetBufferSize());
|
|
for (InstrList::const_iterator it = instrs_.begin(); it != instrs_.end(); ++it) {
|
|
backend.Assemble(*it);
|
|
}
|
|
|
|
InstrList().swap(instrs_);
|
|
LabelList().swap(labels_);
|
|
assembled_ = true;
|
|
}
|
|
|
|
/// Get assembled code
|
|
void *GetCode()
|
|
{
|
|
if (!assembled_) {
|
|
Assemble();
|
|
}
|
|
return codebuff_.GetPointer();
|
|
}
|
|
|
|
/// Get total size of machine code
|
|
size_t GetCodeSize() const
|
|
{
|
|
return codebuff_.GetCodeSize();
|
|
}
|
|
|
|
void AppendInstr(InstrID id, uint32 opcode, uint32 encoding_flag, const detail::Opd& opd1 = detail::Opd(), const detail::Opd& opd2 = detail::Opd(), const detail::Opd& opd3 = detail::Opd(), const detail::Opd& opd4 = detail::Opd(), const detail::Opd& opd5 = detail::Opd(), const detail::Opd& opd6 = detail::Opd())
|
|
{
|
|
instrs_.push_back(Instr(id, opcode, encoding_flag, opd1, opd2, opd3, opd4, opd5, opd6));
|
|
}
|
|
|
|
void AppendJmp(size_t label_id)
|
|
{
|
|
AppendInstr(I_JMP, 0, E_SPECIAL, Imm64(label_id));
|
|
}
|
|
|
|
void AppendJcc(JumpCondition jcc, size_t label_id)
|
|
{
|
|
AppendInstr(I_JCC, jcc, E_SPECIAL, Imm64(label_id));
|
|
}
|
|
|
|
/// Change label id of jump instruction
|
|
static void ChangeLabelID(Instr& instr, size_t label_id)
|
|
{
|
|
JITASM_ASSERT(IsJump(instr.id_) && instr.GetOpd(0).IsImm());
|
|
instr.GetOpd(0).imm_ = label_id;
|
|
}
|
|
|
|
size_t NewLabelID(const std::string& label_name)
|
|
{
|
|
labels_.push_back(Label(label_name));
|
|
return labels_.size() - 1;
|
|
}
|
|
|
|
size_t GetLabelID(const std::string& label_name)
|
|
{
|
|
for (size_t i = 0; i < labels_.size(); i++) {
|
|
if (labels_[i].name == label_name) {
|
|
return i;
|
|
}
|
|
}
|
|
return NewLabelID(label_name);
|
|
}
|
|
|
|
void L(size_t label_id)
|
|
{
|
|
labels_[label_id].instr_number = instrs_.size(); // Label current instruction
|
|
}
|
|
|
|
/// Label
|
|
void L(const std::string& label_name)
|
|
{
|
|
JITASM_ASSERT(!label_name.empty());
|
|
L(GetLabelID(label_name));
|
|
}
|
|
|
|
// General-Purpose Instructions
|
|
void adc(const Reg8& dst, const Imm8& imm) {AppendInstr(I_ADC, 0x80, E_SPECIAL, Imm8(2), RW(dst), imm);}
|
|
void adc(const Mem8& dst, const Imm8& imm) {AppendInstr(I_ADC, 0x80, 0, Imm8(2), RW(dst), imm);}
|
|
void adc(const Reg16& dst, const Imm16& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Mem16& dst, const Imm16& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Reg32& dst, const Imm32& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Mem32& dst, const Imm32& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Reg8& dst, const Reg8& src) {AppendInstr(I_ADC, 0x10, 0, R(src), RW(dst));}
|
|
void adc(const Mem8& dst, const Reg8& src) {AppendInstr(I_ADC, 0x10, 0, R(src), RW(dst));}
|
|
void adc(const Reg8& dst, const Mem8& src) {AppendInstr(I_ADC, 0x12, 0, RW(dst), R(src));}
|
|
void adc(const Reg16& dst, const Reg16& src) {AppendInstr(I_ADC, 0x11, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void adc(const Mem16& dst, const Reg16& src) {AppendInstr(I_ADC, 0x11, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void adc(const Reg16& dst, const Mem16& src) {AppendInstr(I_ADC, 0x13, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void adc(const Reg32& dst, const Reg32& src) {AppendInstr(I_ADC, 0x11, 0, R(src), RW(dst));}
|
|
void adc(const Mem32& dst, const Reg32& src) {AppendInstr(I_ADC, 0x11, 0, R(src), RW(dst));}
|
|
void adc(const Reg32& dst, const Mem32& src) {AppendInstr(I_ADC, 0x13, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void adc(const Reg64& dst, const Imm32& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Mem64& dst, const Imm32& imm) {AppendInstr(I_ADC, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(2), RW(dst), detail::ImmXor8(imm));}
|
|
void adc(const Reg64& dst, const Reg64& src) {AppendInstr(I_ADC, 0x11, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void adc(const Mem64& dst, const Reg64& src) {AppendInstr(I_ADC, 0x11, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void adc(const Reg64& dst, const Mem64& src) {AppendInstr(I_ADC, 0x13, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void add(const Reg8& dst, const Imm8& imm) {AppendInstr(I_ADD, 0x80, E_SPECIAL, Imm8(0), RW(dst), imm);}
|
|
void add(const Mem8& dst, const Imm8& imm) {AppendInstr(I_ADD, 0x80, 0, Imm8(0), RW(dst), imm);}
|
|
void add(const Reg16& dst, const Imm16& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Mem16& dst, const Imm16& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Reg32& dst, const Imm32& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Mem32& dst, const Imm32& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Reg8& dst, const Reg8& src) {AppendInstr(I_ADD, 0x00, 0, R(src), RW(dst));}
|
|
void add(const Mem8& dst, const Reg8& src) {AppendInstr(I_ADD, 0x00, 0, R(src), RW(dst));}
|
|
void add(const Reg8& dst, const Mem8& src) {AppendInstr(I_ADD, 0x02, 0, RW(dst), R(src));}
|
|
void add(const Reg16& dst, const Reg16& src) {AppendInstr(I_ADD, 0x01, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void add(const Mem16& dst, const Reg16& src) {AppendInstr(I_ADD, 0x01, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void add(const Reg16& dst, const Mem16& src) {AppendInstr(I_ADD, 0x03, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void add(const Reg32& dst, const Reg32& src) {AppendInstr(I_ADD, 0x01, 0, R(src), RW(dst));}
|
|
void add(const Mem32& dst, const Reg32& src) {AppendInstr(I_ADD, 0x01, 0, R(src), RW(dst));}
|
|
void add(const Reg32& dst, const Mem32& src) {AppendInstr(I_ADD, 0x03, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void add(const Reg64& dst, const Imm32& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Mem64& dst, const Imm32& imm) {AppendInstr(I_ADD, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(0), RW(dst), detail::ImmXor8(imm));}
|
|
void add(const Reg64& dst, const Reg64& src) {AppendInstr(I_ADD, 0x01, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void add(const Mem64& dst, const Reg64& src) {AppendInstr(I_ADD, 0x01, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void add(const Reg64& dst, const Mem64& src) {AppendInstr(I_ADD, 0x03, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
|
|
void and_(const Reg8& dst, const Imm8& imm)
|
|
{AppendInstr(I_AND, 0x80, E_SPECIAL, Imm8(4), RW(dst), imm);}
|
|
void and_(const Mem8& dst, const Imm8& imm) {AppendInstr(I_AND, 0x80, 0, Imm8(4), RW(dst), imm);}
|
|
void and_(const Reg16& dst, const Imm16& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Mem16& dst, const Imm16& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Reg32& dst, const Imm32& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Mem32& dst, const Imm32& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Reg8& dst, const Reg8& src) {AppendInstr(I_AND, 0x20, 0, R(src), RW(dst));}
|
|
void and_(const Mem8& dst, const Reg8& src) {AppendInstr(I_AND, 0x20, 0, R(src), RW(dst));}
|
|
void and_(const Reg8& dst, const Mem8& src) {AppendInstr(I_AND, 0x22, 0, RW(dst), R(src));}
|
|
void and_(const Reg16& dst, const Reg16& src) {AppendInstr(I_AND, 0x21, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void and_(const Mem16& dst, const Reg16& src) {AppendInstr(I_AND, 0x21, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void and_(const Reg16& dst, const Mem16& src) {AppendInstr(I_AND, 0x23, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void and_(const Reg32& dst, const Reg32& src) {AppendInstr(I_AND, 0x21, 0, R(src), RW(dst));}
|
|
void and_(const Mem32& dst, const Reg32& src) {AppendInstr(I_AND, 0x21, 0, R(src), RW(dst));}
|
|
void and_(const Reg32& dst, const Mem32& src) {AppendInstr(I_AND, 0x23, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void and_(const Reg64& dst, const Imm32& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Mem64& dst, const Imm32& imm) {AppendInstr(I_AND, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(4), RW(dst), detail::ImmXor8(imm));}
|
|
void and_(const Reg64& dst, const Reg64& src) {AppendInstr(I_AND, 0x21, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void and_(const Mem64& dst, const Reg64& src) {AppendInstr(I_AND, 0x21, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void and_(const Reg64& dst, const Mem64& src) {AppendInstr(I_AND, 0x23, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void bsf(const Reg16& dst, const Reg16& src) {AppendInstr(I_BSF, 0x0FBC, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void bsf(const Reg16& dst, const Mem16& src) {AppendInstr(I_BSF, 0x0FBC, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void bsf(const Reg32& dst, const Reg32& src) {AppendInstr(I_BSF, 0x0FBC, 0, W(dst), R(src));}
|
|
void bsf(const Reg32& dst, const Mem32& src) {AppendInstr(I_BSF, 0x0FBC, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void bsf(const Reg64& dst, const Reg64& src) {AppendInstr(I_BSF, 0x0FBC, E_REXW_PREFIX, W(dst), R(src));}
|
|
void bsf(const Reg64& dst, const Mem64& src) {AppendInstr(I_BSF, 0x0FBC, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void bsr(const Reg16& dst, const Reg16& src) {AppendInstr(I_BSR, 0x0FBD, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void bsr(const Reg16& dst, const Mem16& src) {AppendInstr(I_BSR, 0x0FBD, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void bsr(const Reg32& dst, const Reg32& src) {AppendInstr(I_BSR, 0x0FBD, 0, W(dst), R(src));}
|
|
void bsr(const Reg32& dst, const Mem32& src) {AppendInstr(I_BSR, 0x0FBD, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void bsr(const Reg64& dst, const Reg64& src) {AppendInstr(I_BSR, 0x0FBD, E_REXW_PREFIX, W(dst), R(src));}
|
|
void bsr(const Reg64& dst, const Mem64& src) {AppendInstr(I_BSR, 0x0FBD, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void bswap(const Reg32& dst) {AppendInstr(I_BSWAP, 0x0FC8, 0, RW(dst));}
|
|
#ifdef JITASM64
|
|
void bswap(const Reg64& dst) {AppendInstr(I_BSWAP, 0x0FC8, E_REXW_PREFIX, RW(dst));}
|
|
#endif
|
|
void bt(const Reg16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BT, 0x0FA3, E_OPERAND_SIZE_PREFIX, R(bitoffset), R(bitbase));}
|
|
void bt(const Mem16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BT, 0x0FA3, E_OPERAND_SIZE_PREFIX, R(bitoffset), R(bitbase));}
|
|
void bt(const Reg32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BT, 0x0FA3, 0, R(bitoffset), R(bitbase));}
|
|
void bt(const Mem32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BT, 0x0FA3, 0, R(bitoffset), R(bitbase));}
|
|
void bt(const Reg16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(4), R(bitbase), bitoffset);}
|
|
void bt(const Mem16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(4), R(bitbase), bitoffset);}
|
|
void bt(const Reg32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, 0, Imm8(4), R(bitbase), bitoffset);}
|
|
void bt(const Mem32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, 0, Imm8(4), R(bitbase), bitoffset);}
|
|
#ifdef JITASM64
|
|
void bt(const Reg64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BT, 0x0FA3, E_REXW_PREFIX, R(bitoffset), R(bitbase));}
|
|
void bt(const Mem64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BT, 0x0FA3, E_REXW_PREFIX, R(bitoffset), R(bitbase));}
|
|
void bt(const Reg64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, E_REXW_PREFIX, Imm8(4), R(bitbase), bitoffset);}
|
|
void bt(const Mem64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BT, 0x0FBA, E_REXW_PREFIX, Imm8(4), R(bitbase), bitoffset);}
|
|
#endif
|
|
void btc(const Reg16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTC, 0x0FBB, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btc(const Mem16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTC, 0x0FBB, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btc(const Reg32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTC, 0x0FBB, 0, R(bitoffset), RW(bitbase));}
|
|
void btc(const Mem32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTC, 0x0FBB, 0, R(bitoffset), RW(bitbase));}
|
|
void btc(const Reg16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(bitbase), bitoffset);}
|
|
void btc(const Mem16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(bitbase), bitoffset);}
|
|
void btc(const Reg32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, 0, Imm8(7), RW(bitbase), bitoffset);}
|
|
void btc(const Mem32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, 0, Imm8(7), RW(bitbase), bitoffset);}
|
|
#ifdef JITASM64
|
|
void btc(const Reg64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTC, 0x0FBB, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btc(const Mem64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTC, 0x0FBB, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btc(const Reg64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, E_REXW_PREFIX, Imm8(7), RW(bitbase), bitoffset);}
|
|
void btc(const Mem64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTC, 0x0FBA, E_REXW_PREFIX, Imm8(7), RW(bitbase), bitoffset);}
|
|
#endif
|
|
void btr(const Reg16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTR, 0x0FB3, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btr(const Mem16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTR, 0x0FB3, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btr(const Reg32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTR, 0x0FB3, 0, R(bitoffset), RW(bitbase));}
|
|
void btr(const Mem32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTR, 0x0FB3, 0, R(bitoffset), RW(bitbase));}
|
|
void btr(const Reg16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(6), RW(bitbase), bitoffset);}
|
|
void btr(const Mem16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(6), RW(bitbase), bitoffset);}
|
|
void btr(const Reg32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, 0, Imm8(6), RW(bitbase), bitoffset);}
|
|
void btr(const Mem32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, 0, Imm8(6), RW(bitbase), bitoffset);}
|
|
#ifdef JITASM64
|
|
void btr(const Reg64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTR, 0x0FB3, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btr(const Mem64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTR, 0x0FB3, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void btr(const Reg64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, E_REXW_PREFIX, Imm8(6), RW(bitbase), bitoffset);}
|
|
void btr(const Mem64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTR, 0x0FBA, E_REXW_PREFIX, Imm8(6), RW(bitbase), bitoffset);}
|
|
#endif
|
|
void bts(const Reg16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTS, 0x0FAB, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void bts(const Mem16& bitbase, const Reg16& bitoffset) {AppendInstr(I_BTS, 0x0FAB, E_OPERAND_SIZE_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void bts(const Reg32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTS, 0x0FAB, 0, R(bitoffset), RW(bitbase));}
|
|
void bts(const Mem32& bitbase, const Reg32& bitoffset) {AppendInstr(I_BTS, 0x0FAB, 0, R(bitoffset), RW(bitbase));}
|
|
void bts(const Reg16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(bitbase), bitoffset);}
|
|
void bts(const Mem16& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(bitbase), bitoffset);}
|
|
void bts(const Reg32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, 0, Imm8(5), RW(bitbase), bitoffset);}
|
|
void bts(const Mem32& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, 0, Imm8(5), RW(bitbase), bitoffset);}
|
|
#ifdef JITASM64
|
|
void bts(const Reg64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTS, 0x0FAB, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void bts(const Mem64& bitbase, const Reg64& bitoffset) {AppendInstr(I_BTS, 0x0FAB, E_REXW_PREFIX, R(bitoffset), RW(bitbase));}
|
|
void bts(const Reg64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, E_REXW_PREFIX, Imm8(5), RW(bitbase), bitoffset);}
|
|
void bts(const Mem64& bitbase, const Imm8& bitoffset) {AppendInstr(I_BTS, 0x0FBA, E_REXW_PREFIX, Imm8(5), RW(bitbase), bitoffset);}
|
|
#endif
|
|
#ifndef JITASM64
|
|
void call(const Reg16& dst) {AppendInstr(I_CALL, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(2), R(dst));}
|
|
void call(const Reg32& dst) {AppendInstr(I_CALL, 0xFF, 0, Imm8(2), R(dst));}
|
|
void call(const Mem32& dst) {AppendInstr(I_CALL, 0xFF, 0, Imm8(2), R(dst));} // Imm8(2) = register/opcode
|
|
#else
|
|
void call(const Reg64& dst) {AppendInstr(I_CALL, 0xFF, 0, Imm8(2), R(dst));}
|
|
#endif
|
|
void cbw() {AppendInstr(I_CBW, 0x98, E_OPERAND_SIZE_PREFIX, Dummy(RW(eax)));}
|
|
void cwde() {AppendInstr(I_CBW, 0x98, 0, Dummy(RW(eax)));}
|
|
#ifdef JITASM64
|
|
void cdqe() {AppendInstr(I_CBW, 0x98, E_REXW_PREFIX, Dummy(RW(eax)));}
|
|
#endif
|
|
void clc() {AppendInstr(I_CLC, 0xF8, 0);}
|
|
void cld() {AppendInstr(I_CLD, 0xFC, 0);}
|
|
void cli() {AppendInstr(I_CLI, 0xFA, 0);}
|
|
#ifdef JITASM64
|
|
void clts() {AppendInstr(I_CLTS, 0x0F06, 0);}
|
|
#endif
|
|
void cmc() {AppendInstr(I_CMC, 0xF5, 0);}
|
|
void cmova(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F47, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmova(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F47, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovae(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F43, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovae(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F43, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovb(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F42, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovb(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F42, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovbe(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F46, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovbe(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F46, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovc(const Reg16& dst, const Reg16& src) {cmovb(dst, src);}
|
|
void cmovc(const Reg16& dst, const Mem16& src) {cmovb(dst, src);}
|
|
void cmove(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F44, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmove(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F44, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovg(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4F, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovg(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4F, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovge(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4D, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovge(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4D, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovl(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4C, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovl(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4C, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovle(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4E, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovle(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4E, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovna(const Reg16& dst, const Reg16& src) {cmovbe(dst, src);}
|
|
void cmovna(const Reg16& dst, const Mem16& src) {cmovbe(dst, src);}
|
|
void cmovnae(const Reg16& dst, const Reg16& src) {cmovb(dst, src);}
|
|
void cmovnae(const Reg16& dst, const Mem16& src) {cmovb(dst, src);}
|
|
void cmovnb(const Reg16& dst, const Reg16& src) {cmovae(dst, src);}
|
|
void cmovnb(const Reg16& dst, const Mem16& src) {cmovae(dst, src);}
|
|
void cmovnbe(const Reg16& dst, const Reg16& src) {cmova(dst, src);}
|
|
void cmovnbe(const Reg16& dst, const Mem16& src) {cmova(dst, src);}
|
|
void cmovnc(const Reg16& dst, const Reg16& src) {cmovae(dst, src);}
|
|
void cmovnc(const Reg16& dst, const Mem16& src) {cmovae(dst, src);}
|
|
void cmovne(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F45, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovne(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F45, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovng(const Reg16& dst, const Reg16& src) {cmovle(dst, src);}
|
|
void cmovng(const Reg16& dst, const Mem16& src) {cmovle(dst, src);}
|
|
void cmovnge(const Reg16& dst, const Reg16& src) {cmovl(dst, src);}
|
|
void cmovnge(const Reg16& dst, const Mem16& src) {cmovl(dst, src);}
|
|
void cmovnl(const Reg16& dst, const Reg16& src) {cmovge(dst, src);}
|
|
void cmovnl(const Reg16& dst, const Mem16& src) {cmovge(dst, src);}
|
|
void cmovnle(const Reg16& dst, const Reg16& src) {cmovg(dst, src);}
|
|
void cmovnle(const Reg16& dst, const Mem16& src) {cmovg(dst, src);}
|
|
void cmovno(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F41, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovno(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F41, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovnp(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4B, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovnp(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4B, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovns(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F49, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovns(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F49, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovnz(const Reg16& dst, const Reg16& src) {cmovne(dst, src);}
|
|
void cmovnz(const Reg16& dst, const Mem16& src) {cmovne(dst, src);}
|
|
void cmovo(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F40, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovo(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F40, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovp(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F4A, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovp(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F4A, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovpe(const Reg16& dst, const Reg16& src) {cmovp(dst, src);}
|
|
void cmovpe(const Reg16& dst, const Mem16& src) {cmovp(dst, src);}
|
|
void cmovpo(const Reg16& dst, const Reg16& src) {cmovnp(dst, src);}
|
|
void cmovpo(const Reg16& dst, const Mem16& src) {cmovnp(dst, src);}
|
|
void cmovs(const Reg16& dst, const Reg16& src) {AppendInstr(I_CMOVCC, 0x0F48, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovs(const Reg16& dst, const Mem16& src) {AppendInstr(I_CMOVCC, 0x0F48, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void cmovz(const Reg16& dst, const Reg16& src) {cmove(dst, src);}
|
|
void cmovz(const Reg16& dst, const Mem16& src) {cmove(dst, src);}
|
|
void cmova(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F47, 0, RW(dst), R(src));}
|
|
void cmova(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F47, 0, RW(dst), R(src));}
|
|
void cmovae(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F43, 0, RW(dst), R(src));}
|
|
void cmovae(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F43, 0, RW(dst), R(src));}
|
|
void cmovb(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F42, 0, RW(dst), R(src));}
|
|
void cmovb(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F42, 0, RW(dst), R(src));}
|
|
void cmovbe(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F46, 0, RW(dst), R(src));}
|
|
void cmovbe(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F46, 0, RW(dst), R(src));}
|
|
void cmovc(const Reg32& dst, const Reg32& src) {cmovb(dst, src);}
|
|
void cmovc(const Reg32& dst, const Mem32& src) {cmovb(dst, src);}
|
|
void cmove(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F44, 0, RW(dst), R(src));}
|
|
void cmove(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F44, 0, RW(dst), R(src));}
|
|
void cmovg(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4F, 0, RW(dst), R(src));}
|
|
void cmovg(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4F, 0, RW(dst), R(src));}
|
|
void cmovge(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4D, 0, RW(dst), R(src));}
|
|
void cmovge(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4D, 0, RW(dst), R(src));}
|
|
void cmovl(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4C, 0, RW(dst), R(src));}
|
|
void cmovl(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4C, 0, RW(dst), R(src));}
|
|
void cmovle(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4E, 0, RW(dst), R(src));}
|
|
void cmovle(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4E, 0, RW(dst), R(src));}
|
|
void cmovna(const Reg32& dst, const Reg32& src) {cmovbe(dst, src);}
|
|
void cmovna(const Reg32& dst, const Mem32& src) {cmovbe(dst, src);}
|
|
void cmovnae(const Reg32& dst, const Reg32& src) {cmovb(dst, src);}
|
|
void cmovnae(const Reg32& dst, const Mem32& src) {cmovb(dst, src);}
|
|
void cmovnb(const Reg32& dst, const Reg32& src) {cmovae(dst, src);}
|
|
void cmovnb(const Reg32& dst, const Mem32& src) {cmovae(dst, src);}
|
|
void cmovnbe(const Reg32& dst, const Reg32& src) {cmova(dst, src);}
|
|
void cmovnbe(const Reg32& dst, const Mem32& src) {cmova(dst, src);}
|
|
void cmovnc(const Reg32& dst, const Reg32& src) {cmovae(dst, src);}
|
|
void cmovnc(const Reg32& dst, const Mem32& src) {cmovae(dst, src);}
|
|
void cmovne(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F45, 0, RW(dst), R(src));}
|
|
void cmovne(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F45, 0, RW(dst), R(src));}
|
|
void cmovng(const Reg32& dst, const Reg32& src) {cmovle(dst, src);}
|
|
void cmovng(const Reg32& dst, const Mem32& src) {cmovle(dst, src);}
|
|
void cmovnge(const Reg32& dst, const Reg32& src) {cmovl(dst, src);}
|
|
void cmovnge(const Reg32& dst, const Mem32& src) {cmovl(dst, src);}
|
|
void cmovnl(const Reg32& dst, const Reg32& src) {cmovge(dst, src);}
|
|
void cmovnl(const Reg32& dst, const Mem32& src) {cmovge(dst, src);}
|
|
void cmovnle(const Reg32& dst, const Reg32& src) {cmovg(dst, src);}
|
|
void cmovnle(const Reg32& dst, const Mem32& src) {cmovg(dst, src);}
|
|
void cmovno(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F41, 0, RW(dst), R(src));}
|
|
void cmovno(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F41, 0, RW(dst), R(src));}
|
|
void cmovnp(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4B, 0, RW(dst), R(src));}
|
|
void cmovnp(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4B, 0, RW(dst), R(src));}
|
|
void cmovns(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F49, 0, RW(dst), R(src));}
|
|
void cmovns(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F49, 0, RW(dst), R(src));}
|
|
void cmovnz(const Reg32& dst, const Reg32& src) {cmovne(dst, src);}
|
|
void cmovnz(const Reg32& dst, const Mem32& src) {cmovne(dst, src);}
|
|
void cmovo(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F40, 0, RW(dst), R(src));}
|
|
void cmovo(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F40, 0, RW(dst), R(src));}
|
|
void cmovp(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F4A, 0, RW(dst), R(src));}
|
|
void cmovp(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F4A, 0, RW(dst), R(src));}
|
|
void cmovpe(const Reg32& dst, const Reg32& src) {cmovp(dst, src);}
|
|
void cmovpe(const Reg32& dst, const Mem32& src) {cmovp(dst, src);}
|
|
void cmovpo(const Reg32& dst, const Reg32& src) {cmovnp(dst, src);}
|
|
void cmovpo(const Reg32& dst, const Mem32& src) {cmovnp(dst, src);}
|
|
void cmovs(const Reg32& dst, const Reg32& src) {AppendInstr(I_CMOVCC, 0x0F48, 0, RW(dst), R(src));}
|
|
void cmovs(const Reg32& dst, const Mem32& src) {AppendInstr(I_CMOVCC, 0x0F48, 0, RW(dst), R(src));}
|
|
void cmovz(const Reg32& dst, const Reg32& src) {cmove(dst, src);}
|
|
void cmovz(const Reg32& dst, const Mem32& src) {cmove(dst, src);}
|
|
#ifdef JITASM64
|
|
void cmova(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F47, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmova(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F47, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovae(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F43, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovae(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F43, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovb(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F42, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovb(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F42, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovbe(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F46, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovbe(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F46, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovc(const Reg64& dst, const Reg64& src) {cmovb(dst, src);}
|
|
void cmovc(const Reg64& dst, const Mem64& src) {cmovb(dst, src);}
|
|
void cmove(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F44, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmove(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F44, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovg(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4F, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovg(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4F, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovge(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4D, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovge(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4D, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovl(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4C, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovl(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4C, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovle(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4E, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovle(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4E, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovna(const Reg64& dst, const Reg64& src) {cmovbe(dst, src);}
|
|
void cmovna(const Reg64& dst, const Mem64& src) {cmovbe(dst, src);}
|
|
void cmovnae(const Reg64& dst, const Reg64& src) {cmovb(dst, src);}
|
|
void cmovnae(const Reg64& dst, const Mem64& src) {cmovb(dst, src);}
|
|
void cmovnb(const Reg64& dst, const Reg64& src) {cmovae(dst, src);}
|
|
void cmovnb(const Reg64& dst, const Mem64& src) {cmovae(dst, src);}
|
|
void cmovnbe(const Reg64& dst, const Reg64& src) {cmova(dst, src);}
|
|
void cmovnbe(const Reg64& dst, const Mem64& src) {cmova(dst, src);}
|
|
void cmovnc(const Reg64& dst, const Reg64& src) {cmovae(dst, src);}
|
|
void cmovnc(const Reg64& dst, const Mem64& src) {cmovae(dst, src);}
|
|
void cmovne(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F45, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovne(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F45, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovng(const Reg64& dst, const Reg64& src) {cmovle(dst, src);}
|
|
void cmovng(const Reg64& dst, const Mem64& src) {cmovle(dst, src);}
|
|
void cmovnge(const Reg64& dst, const Reg64& src) {cmovl(dst, src);}
|
|
void cmovnge(const Reg64& dst, const Mem64& src) {cmovl(dst, src);}
|
|
void cmovnl(const Reg64& dst, const Reg64& src) {cmovge(dst, src);}
|
|
void cmovnl(const Reg64& dst, const Mem64& src) {cmovge(dst, src);}
|
|
void cmovnle(const Reg64& dst, const Reg64& src) {cmovg(dst, src);}
|
|
void cmovnle(const Reg64& dst, const Mem64& src) {cmovg(dst, src);}
|
|
void cmovno(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F41, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovno(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F41, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovnp(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4B, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovnp(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4B, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovns(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F49, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovns(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F49, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovnz(const Reg64& dst, const Reg64& src) {cmovne(dst, src);}
|
|
void cmovnz(const Reg64& dst, const Mem64& src) {cmovne(dst, src);}
|
|
void cmovo(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F40, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovo(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F40, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovp(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F4A, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovp(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F4A, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovpe(const Reg64& dst, const Reg64& src) {cmovp(dst, src);}
|
|
void cmovpe(const Reg64& dst, const Mem64& src) {cmovp(dst, src);}
|
|
void cmovpo(const Reg64& dst, const Reg64& src) {cmovnp(dst, src);}
|
|
void cmovpo(const Reg64& dst, const Mem64& src) {cmovnp(dst, src);}
|
|
void cmovs(const Reg64& dst, const Reg64& src) {AppendInstr(I_CMOVCC, 0x0F48, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovs(const Reg64& dst, const Mem64& src) {AppendInstr(I_CMOVCC, 0x0F48, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cmovz(const Reg64& dst, const Reg64& src) {cmove(dst, src);}
|
|
void cmovz(const Reg64& dst, const Mem64& src) {cmove(dst, src);}
|
|
#endif
|
|
void cmp(const Reg8& lhs, const Imm8& imm) {AppendInstr(I_CMP, 0x80, E_SPECIAL, Imm8(7), R(lhs), imm);}
|
|
void cmp(const Mem8& lhs, const Imm8& imm) {AppendInstr(I_CMP, 0x80, 0, Imm8(7), R(lhs), imm);}
|
|
void cmp(const Reg16& lhs, const Imm16& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Mem16& lhs, const Imm16& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Reg32& lhs, const Imm32& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Mem32& lhs, const Imm32& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Reg8& lhs, const Reg8& rhs) {AppendInstr(I_CMP, 0x38, 0, R(rhs), R(lhs));}
|
|
void cmp(const Mem8& lhs, const Reg8& rhs) {AppendInstr(I_CMP, 0x38, 0, R(rhs), R(lhs));}
|
|
void cmp(const Reg8& lhs, const Mem8& rhs) {AppendInstr(I_CMP, 0x3A, 0, R(lhs), R(rhs));}
|
|
void cmp(const Reg16& lhs, const Reg16& rhs) {AppendInstr(I_CMP, 0x39, E_OPERAND_SIZE_PREFIX, R(rhs), R(lhs));}
|
|
void cmp(const Mem16& lhs, const Reg16& rhs) {AppendInstr(I_CMP, 0x39, E_OPERAND_SIZE_PREFIX, R(rhs), R(lhs));}
|
|
void cmp(const Reg16& lhs, const Mem16& rhs) {AppendInstr(I_CMP, 0x3B, E_OPERAND_SIZE_PREFIX, R(lhs), R(rhs));}
|
|
void cmp(const Reg32& lhs, const Reg32& rhs) {AppendInstr(I_CMP, 0x39, 0, R(rhs), R(lhs));}
|
|
void cmp(const Mem32& lhs, const Reg32& rhs) {AppendInstr(I_CMP, 0x39, 0, R(rhs), R(lhs));}
|
|
void cmp(const Reg32& lhs, const Mem32& rhs) {AppendInstr(I_CMP, 0x3B, 0, R(lhs), R(rhs));}
|
|
#ifdef JITASM64
|
|
void cmp(const Reg64& lhs, const Imm32& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Mem64& lhs, const Imm32& imm) {AppendInstr(I_CMP, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(7), R(lhs), detail::ImmXor8(imm));}
|
|
void cmp(const Reg64& lhs, const Reg64& rhs) {AppendInstr(I_CMP, 0x39, E_REXW_PREFIX, R(rhs), R(lhs));}
|
|
void cmp(const Mem64& lhs, const Reg64& rhs) {AppendInstr(I_CMP, 0x39, E_REXW_PREFIX, R(rhs), R(lhs));}
|
|
void cmp(const Reg64& lhs, const Mem64& rhs) {AppendInstr(I_CMP, 0x3B, E_REXW_PREFIX, R(lhs), R(rhs));}
|
|
#endif
|
|
void cmpsb() {AppendInstr(I_CMPS_B, 0xA6, 0, Dummy(RW(edi)), Dummy(RW(esi)));}
|
|
void cmpsw() {AppendInstr(I_CMPS_W, 0xA7, E_OPERAND_SIZE_PREFIX, Dummy(RW(edi)), Dummy(RW(esi)));}
|
|
void cmpsd() {AppendInstr(I_CMPS_D, 0xA7, 0, Dummy(RW(edi)), Dummy(RW(esi)));}
|
|
#ifdef JITASM64
|
|
void cmpsq() {AppendInstr(I_CMPS_Q, 0xA7, E_REXW_PREFIX, Dummy(RW(rdi)), Dummy(RW(rsi)));}
|
|
#endif
|
|
void cmpxchg(const Reg8& dst, const Reg8& src, const Reg8& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB0, 0, R(src), RW(dst), Dummy(RW(cmpx),al));}
|
|
void cmpxchg(const Mem8& dst, const Reg8& src, const Reg8& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB0, 0, R(src), RW(dst), Dummy(RW(cmpx),al));}
|
|
void cmpxchg(const Reg16& dst, const Reg16& src, const Reg16& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(RW(cmpx),ax));}
|
|
void cmpxchg(const Mem16& dst, const Reg16& src, const Reg16& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(RW(cmpx),ax));}
|
|
void cmpxchg(const Reg32& dst, const Reg32& src, const Reg32& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, 0, R(src), RW(dst), Dummy(RW(cmpx),eax));}
|
|
void cmpxchg(const Mem32& dst, const Reg32& src, const Reg32& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, 0, R(src), RW(dst), Dummy(RW(cmpx),eax));}
|
|
#ifdef JITASM64
|
|
void cmpxchg(const Reg64& dst, const Reg64& src, const Reg64& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, E_REXW_PREFIX, R(src), RW(dst), Dummy(RW(cmpx),rax));}
|
|
void cmpxchg(const Mem64& dst, const Reg64& src, const Reg64& cmpx) {AppendInstr(I_CMPXCHG, 0x0FB1, E_REXW_PREFIX, R(src), RW(dst), Dummy(RW(cmpx),rax));}
|
|
#endif
|
|
void cmpxchg8b(const Mem64& dst) {AppendInstr(I_CMPXCHG8B, 0x0FC7, 0, Imm8(1), RW(dst), Dummy(RW(edx)), Dummy(RW(eax)), Dummy(R(ecx)), Dummy(R(ebx)));}
|
|
#ifdef JITASM64
|
|
void cmpxchg16b(const Mem128& dst) {AppendInstr(I_CMPXCHG16B, 0x0FC7, E_REXW_PREFIX, Imm8(1), RW(dst), Dummy(RW(rdx)), Dummy(RW(rax)), Dummy(R(rcx)), Dummy(R(rbx)));}
|
|
#endif
|
|
void cpuid() {AppendInstr(I_CPUID, 0x0FA2, 0, Dummy(RW(eax)), Dummy(RW(ecx)), Dummy(W(ebx)), Dummy(W(edx)));}
|
|
void cwd() {AppendInstr(I_CWD, 0x99, E_OPERAND_SIZE_PREFIX, Dummy(W(dx)), Dummy(R(ax)));}
|
|
void cdq() {AppendInstr(I_CDQ, 0x99, 0, Dummy(W(edx)), Dummy(R(eax)));}
|
|
#ifdef JITASM64
|
|
void cqo() {AppendInstr(I_CQO, 0x99, E_REXW_PREFIX, Dummy(W(rdx)), Dummy(R(rax)));}
|
|
#endif
|
|
void dec(const Reg8& dst) {AppendInstr(I_DEC, 0xFE, 0, Imm8(1), RW(dst));}
|
|
void dec(const Mem8& dst) {AppendInstr(I_DEC, 0xFE, 0, Imm8(1), RW(dst));}
|
|
void dec(const Mem16& dst) {AppendInstr(I_DEC, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst));}
|
|
void dec(const Mem32& dst) {AppendInstr(I_DEC, 0xFF, 0, Imm8(1), RW(dst));}
|
|
#ifndef JITASM64
|
|
void dec(const Reg16& dst) {AppendInstr(I_DEC, 0x48, E_OPERAND_SIZE_PREFIX, RW(dst));}
|
|
void dec(const Reg32& dst) {AppendInstr(I_DEC, 0x48, 0, RW(dst));}
|
|
#else
|
|
void dec(const Reg16& dst) {AppendInstr(I_DEC, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst));}
|
|
void dec(const Reg32& dst) {AppendInstr(I_DEC, 0xFF, 0, Imm8(1), RW(dst));}
|
|
void dec(const Reg64& dst) {AppendInstr(I_DEC, 0xFF, E_REXW_PREFIX, Imm8(1), RW(dst));}
|
|
void dec(const Mem64& dst) {AppendInstr(I_DEC, 0xFF, E_REXW_PREFIX, Imm8(1), RW(dst));}
|
|
#endif
|
|
void div(const Reg8& src) {AppendInstr(I_DIV, 0xF6, 0, Imm8(6), R(src), Dummy(RW(ax)));}
|
|
void div(const Mem8& src) {AppendInstr(I_DIV, 0xF6, 0, Imm8(6), R(src), Dummy(RW(ax)));}
|
|
void div(const Reg16& src) {AppendInstr(I_DIV, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(6), R(src), Dummy(RW(ax)), Dummy(RW(dx)));}
|
|
void div(const Mem16& src) {AppendInstr(I_DIV, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(6), R(src), Dummy(RW(ax)), Dummy(RW(dx)));}
|
|
void div(const Reg32& src) {AppendInstr(I_DIV, 0xF7, 0, Imm8(6), R(src), Dummy(RW(eax)), Dummy(RW(edx)));}
|
|
void div(const Mem32& src) {AppendInstr(I_DIV, 0xF7, 0, Imm8(6), R(src), Dummy(RW(eax)), Dummy(RW(edx)));}
|
|
#ifdef JITASM64
|
|
void div(const Reg64& src) {AppendInstr(I_DIV, 0xF7, E_REXW_PREFIX, Imm8(6), R(src), Dummy(RW(rax)), Dummy(RW(rdx)));}
|
|
void div(const Mem64& src) {AppendInstr(I_DIV, 0xF7, E_REXW_PREFIX, Imm8(6), R(src), Dummy(RW(rax)), Dummy(RW(rdx)));}
|
|
#endif
|
|
void enter(const Imm16& imm16, const Imm8& imm8) {AppendInstr(I_ENTER, 0xC8, 0, imm16, imm8, Dummy(RW(esp)), Dummy(RW(ebp)));}
|
|
void hlt() {AppendInstr(I_HLT, 0xF4, 0);}
|
|
void idiv(const Reg8& src) {AppendInstr(I_IDIV, 0xF6, 0, Imm8(7), R(src), Dummy(RW(ax)));}
|
|
void idiv(const Mem8& src) {AppendInstr(I_IDIV, 0xF6, 0, Imm8(7), R(src), Dummy(RW(ax)));}
|
|
void idiv(const Reg16& src) {AppendInstr(I_IDIV, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(7), R(src), Dummy(RW(ax)), Dummy(RW(dx)));}
|
|
void idiv(const Mem16& src) {AppendInstr(I_IDIV, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(7), R(src), Dummy(RW(ax)), Dummy(RW(dx)));}
|
|
void idiv(const Reg32& src) {AppendInstr(I_IDIV, 0xF7, 0, Imm8(7), R(src), Dummy(RW(eax)), Dummy(RW(edx)));}
|
|
void idiv(const Mem32& src) {AppendInstr(I_IDIV, 0xF7, 0, Imm8(7), R(src), Dummy(RW(eax)), Dummy(RW(edx)));}
|
|
#ifdef JITASM64
|
|
void idiv(const Reg64& src) {AppendInstr(I_IDIV, 0xF7, E_REXW_PREFIX, Imm8(7), R(src), Dummy(RW(rax)), Dummy(RW(rdx)));}
|
|
void idiv(const Mem64& src) {AppendInstr(I_IDIV, 0xF7, E_REXW_PREFIX, Imm8(7), R(src), Dummy(RW(rax)), Dummy(RW(rdx)));}
|
|
#endif
|
|
void imul(const Reg8& src) {AppendInstr(I_IMUL, 0xF6, 0, Imm8(5), R(src), Dummy(RW(ax)));}
|
|
void imul(const Mem8& src) {AppendInstr(I_IMUL, 0xF6, 0, Imm8(5), R(src), Dummy(RW(ax)));}
|
|
void imul(const Reg16& src) {AppendInstr(I_IMUL, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(5), R(src), Dummy(RW(ax)), Dummy(W(dx)));}
|
|
void imul(const Mem16& src) {AppendInstr(I_IMUL, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(5), R(src), Dummy(RW(ax)), Dummy(W(dx)));}
|
|
void imul(const Reg32& src) {AppendInstr(I_IMUL, 0xF7, 0, Imm8(5), R(src), Dummy(RW(eax)), Dummy(W(edx)));}
|
|
void imul(const Mem32& src) {AppendInstr(I_IMUL, 0xF7, 0, Imm8(5), R(src), Dummy(RW(eax)), Dummy(W(edx)));}
|
|
void imul(const Reg16& dst, const Reg16& src) {AppendInstr(I_IMUL, 0x0FAF, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void imul(const Reg16& dst, const Mem16& src) {AppendInstr(I_IMUL, 0x0FAF, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void imul(const Reg32& dst, const Reg32& src) {AppendInstr(I_IMUL, 0x0FAF, 0, RW(dst), R(src));}
|
|
void imul(const Reg32& dst, const Mem32& src) {AppendInstr(I_IMUL, 0x0FAF, 0, RW(dst), R(src));}
|
|
void imul(const Reg16& dst, const Reg16& src, const Imm16& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, E_OPERAND_SIZE_PREFIX, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg16& dst, const Mem16& src, const Imm16& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, E_OPERAND_SIZE_PREFIX, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg32& dst, const Reg32& src, const Imm32& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, 0, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg32& dst, const Mem32& src, const Imm32& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, 0, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg16& dst, const Imm16& imm) {imul(dst, dst, imm);}
|
|
void imul(const Reg32& dst, const Imm32& imm) {imul(dst, dst, imm);}
|
|
#ifdef JITASM64
|
|
void imul(const Reg64& src) {AppendInstr(I_IMUL, 0xF7, E_REXW_PREFIX, Imm8(5), R(src), Dummy(RW(rax)), Dummy(W(rdx)));}
|
|
void imul(const Mem64& src) {AppendInstr(I_IMUL, 0xF7, E_REXW_PREFIX, Imm8(5), R(src), Dummy(RW(rax)), Dummy(W(rdx)));}
|
|
void imul(const Reg64& dst, const Reg64& src) {AppendInstr(I_IMUL, 0x0FAF, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void imul(const Reg64& dst, const Mem64& src) {AppendInstr(I_IMUL, 0x0FAF, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void imul(const Reg64& dst, const Reg64& src, const Imm32& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, E_REXW_PREFIX, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg64& dst, const Mem64& src, const Imm32& imm) {AppendInstr(I_IMUL, detail::IsInt8(imm.GetImm()) ? 0x6B : 0x69, E_REXW_PREFIX, W(dst), R(src), detail::ImmXor8(imm));}
|
|
void imul(const Reg64& dst, const Imm32& imm) {imul(dst, dst, imm);}
|
|
#endif
|
|
void in(const Reg8& dst, const Imm8& src) {AppendInstr(I_IN, 0xE4, 0, src, Dummy(W(dst),al));}
|
|
void in(const Reg16& dst, const Imm8& src) {AppendInstr(I_IN, 0xE5, E_OPERAND_SIZE_PREFIX, src, Dummy(W(dst),ax));}
|
|
void in(const Reg32& dst, const Imm8& src) {AppendInstr(I_IN, 0xE5, 0, src, Dummy(W(dst),eax));}
|
|
void in(const Reg8& dst, const Reg16& src) {AppendInstr(I_IN, 0xEC, 0, Dummy(R(src),dx), Dummy(W(dst),al));}
|
|
void in(const Reg16& dst, const Reg16& src) {AppendInstr(I_IN, 0xED, E_OPERAND_SIZE_PREFIX, Dummy(R(src),dx), Dummy(W(dst),ax));}
|
|
void in(const Reg32& dst, const Reg16& src) {AppendInstr(I_IN, 0xED, 0, Dummy(R(src),dx), Dummy(W(dst),eax));}
|
|
void inc(const Reg8& dst) {AppendInstr(I_INC, 0xFE, 0, Imm8(0), RW(dst));}
|
|
void inc(const Mem8& dst) {AppendInstr(I_INC, 0xFE, 0, Imm8(0), RW(dst));}
|
|
void inc(const Mem16& dst) {AppendInstr(I_INC, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst));}
|
|
void inc(const Mem32& dst) {AppendInstr(I_INC, 0xFF, 0, Imm8(0), RW(dst));}
|
|
#ifndef JITASM64
|
|
void inc(const Reg16& dst) {AppendInstr(I_INC, 0x40, E_OPERAND_SIZE_PREFIX, RW(dst));}
|
|
void inc(const Reg32& dst) {AppendInstr(I_INC, 0x40, 0, RW(dst));}
|
|
#else
|
|
void inc(const Reg16& dst) {AppendInstr(I_INC, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst));}
|
|
void inc(const Reg32& dst) {AppendInstr(I_INC, 0xFF, 0, Imm8(0), RW(dst));}
|
|
void inc(const Reg64& dst) {AppendInstr(I_INC, 0xFF, E_REXW_PREFIX, Imm8(0), RW(dst));}
|
|
void inc(const Mem64& dst) {AppendInstr(I_INC, 0xFF, E_REXW_PREFIX, Imm8(0), RW(dst));}
|
|
#endif
|
|
void insb(const Reg& dst, const Reg16& src) {AppendInstr(I_INS_B, 0x6C, 0, Dummy(R(src),dx), Dummy(RW(dst),edi));}
|
|
void insw(const Reg& dst, const Reg16& src) {AppendInstr(I_INS_W, 0x6D, E_OPERAND_SIZE_PREFIX, Dummy(R(src),dx), Dummy(RW(dst),edi));}
|
|
void insd(const Reg& dst, const Reg16& src) {AppendInstr(I_INS_D, 0x6D, 0, Dummy(R(src),dx), Dummy(RW(dst),edi));}
|
|
void rep_insb(const Reg& dst, const Reg16& src, const Reg& count) {AppendInstr(I_INS_B, 0x6C, E_REP_PREFIX, Dummy(R(src),dx), Dummy(RW(dst),edi), Dummy(RW(count),ecx));}
|
|
void rep_insw(const Reg& dst, const Reg16& src, const Reg& count) {AppendInstr(I_INS_W, 0x6D, E_REP_PREFIX | E_OPERAND_SIZE_PREFIX, Dummy(R(src),dx), Dummy(RW(dst),edi), Dummy(RW(count),ecx));}
|
|
void rep_insd(const Reg& dst, const Reg16& src, const Reg& count) {AppendInstr(I_INS_D, 0x6D, E_REP_PREFIX, Dummy(R(src),dx), Dummy(RW(dst),edi), Dummy(RW(count),ecx));}
|
|
void int3() {AppendInstr(I_INT3, 0xCC, 0);}
|
|
void intn(const Imm8& n) {AppendInstr(I_INTN, 0xCD, 0, n);}
|
|
#ifndef JITASM64
|
|
void into() {AppendInstr(I_INTO, 0xCE, 0);}
|
|
#endif
|
|
void invd() {AppendInstr(I_INVD, 0x0F08, 0);}
|
|
template<class Ty> void invlpg(const MemT<Ty>& dst) {AppendInstr(I_INVLPG, 0x0F01, 0, Imm8(7), R(dst));}
|
|
void iret() {AppendInstr(I_IRET, 0xCF, E_OPERAND_SIZE_PREFIX);}
|
|
void iretd() {AppendInstr(I_IRETD, 0xCF, 0);}
|
|
#ifdef JITASM64
|
|
void iretq() {AppendInstr(I_IRETQ, 0xCF, E_REXW_PREFIX);}
|
|
#endif
|
|
void jmp(const Mem32& dst) {AppendInstr(I_JMP, 0xFF, 0, Imm8(4), R(dst));}
|
|
void jmp(const std::string& label_name) {AppendJmp(GetLabelID(label_name));}
|
|
void ja(const std::string& label_name) {AppendJcc(JCC_A, GetLabelID(label_name));}
|
|
void jae(const std::string& label_name) {AppendJcc(JCC_AE, GetLabelID(label_name));}
|
|
void jb(const std::string& label_name) {AppendJcc(JCC_B, GetLabelID(label_name));}
|
|
void jbe(const std::string& label_name) {AppendJcc(JCC_BE, GetLabelID(label_name));}
|
|
void jc(const std::string& label_name) {jb(label_name);}
|
|
#ifdef JITASM64
|
|
void jecxz(const std::string& label_name) {AppendJcc(JCC_ECXZ, GetLabelID(label_name));} // short jump only
|
|
void jrcxz (const std::string& label_name) {AppendJcc(JCC_RCXZ, GetLabelID(label_name));} // short jump only
|
|
#else
|
|
void jcxz(const std::string& label_name) {AppendJcc(JCC_CXZ, GetLabelID(label_name));} // short jump only
|
|
void jecxz(const std::string& label_name) {AppendJcc(JCC_ECXZ, GetLabelID(label_name));} // short jump only
|
|
#endif
|
|
void je(const std::string& label_name) {AppendJcc(JCC_E, GetLabelID(label_name));}
|
|
void jg(const std::string& label_name) {AppendJcc(JCC_G, GetLabelID(label_name));}
|
|
void jge(const std::string& label_name) {AppendJcc(JCC_GE, GetLabelID(label_name));}
|
|
void jl(const std::string& label_name) {AppendJcc(JCC_L, GetLabelID(label_name));}
|
|
void jle(const std::string& label_name) {AppendJcc(JCC_LE, GetLabelID(label_name));}
|
|
void jna(const std::string& label_name) {jbe(label_name);}
|
|
void jnae(const std::string& label_name) {jb(label_name);}
|
|
void jnb(const std::string& label_name) {jae(label_name);}
|
|
void jnbe(const std::string& label_name) {ja(label_name);}
|
|
void jnc(const std::string& label_name) {jae(label_name);}
|
|
void jne(const std::string& label_name) {AppendJcc(JCC_NE, GetLabelID(label_name));}
|
|
void jng(const std::string& label_name) {jle(label_name);}
|
|
void jnge(const std::string& label_name) {jl(label_name);}
|
|
void jnl(const std::string& label_name) {jge(label_name);}
|
|
void jnle(const std::string& label_name) {jg(label_name);}
|
|
void jno(const std::string& label_name) {AppendJcc(JCC_NO, GetLabelID(label_name));}
|
|
void jnp(const std::string& label_name) {AppendJcc(JCC_NP, GetLabelID(label_name));}
|
|
void jns(const std::string& label_name) {AppendJcc(JCC_NS, GetLabelID(label_name));}
|
|
void jnz(const std::string& label_name) {jne(label_name);}
|
|
void jo(const std::string& label_name) {AppendJcc(JCC_O, GetLabelID(label_name));}
|
|
void jp(const std::string& label_name) {AppendJcc(JCC_P, GetLabelID(label_name));}
|
|
void jpe(const std::string& label_name) {jp(label_name);}
|
|
void jpo(const std::string& label_name) {jnp(label_name);}
|
|
void js(const std::string& label_name) {AppendJcc(JCC_S, GetLabelID(label_name));}
|
|
void jz(const std::string& label_name) {je(label_name);}
|
|
void lar(const Reg16& dst, const Reg16& src) {AppendInstr(I_LAR, 0x0F02, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void lar(const Reg16& dst, const Mem16& src) {AppendInstr(I_LAR, 0x0F02, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void lar(const Reg32& dst, const Reg32& src) {AppendInstr(I_LAR, 0x0F02, 0, W(dst), R(src));}
|
|
void lar(const Reg32& dst, const Mem16& src) {AppendInstr(I_LAR, 0x0F02, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void lar(const Reg64& dst, const Reg64& src) {AppendInstr(I_LAR, 0x0F02, E_REXW_PREFIX, W(dst), R(src));}
|
|
void lar(const Reg64& dst, const Mem16& src) {AppendInstr(I_LAR, 0x0F02, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
template<class Ty> void lea(const Reg16& dst, const MemT<Ty>& src) {AppendInstr(I_LEA, 0x8D, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
template<class Ty> void lea(const Reg32& dst, const MemT<Ty>& src) {AppendInstr(I_LEA, 0x8D, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
template<class Ty> void lea(const Reg64& dst, const MemT<Ty>& src) {AppendInstr(I_LEA, 0x8D, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void leave() {AppendInstr(I_LEAVE, 0xC9, 0, Dummy(W(esp)), Dummy(RW(ebp)));}
|
|
//lgdt
|
|
//lidt
|
|
void lldt(const Reg16& src) {AppendInstr(I_LLDT, 0x0F00, 0, Imm8(2), R(src));}
|
|
void lldt(const Mem16& src) {AppendInstr(I_LLDT, 0x0F00, 0, Imm8(2), R(src));}
|
|
void lmsw(const Reg16& src) {AppendInstr(I_LMSW, 0x0F01, 0, Imm8(6), R(src));}
|
|
void lmsw(const Mem16& src) {AppendInstr(I_LMSW, 0x0F01, 0, Imm8(6), R(src));}
|
|
void lodsb(const Reg8& dst, const Reg& src) {AppendInstr(I_LODS_B, 0xAC, 0, Dummy(W(dst),al), Dummy(RW(src),zsi));}
|
|
void lodsw(const Reg16& dst, const Reg& src) {AppendInstr(I_LODS_W, 0xAD, E_OPERAND_SIZE_PREFIX, Dummy(W(dst),ax), Dummy(RW(src),zsi));}
|
|
void lodsd(const Reg32& dst, const Reg& src) {AppendInstr(I_LODS_D, 0xAD, 0, Dummy(W(dst),eax), Dummy(RW(src),zsi));}
|
|
#ifdef JITASM64
|
|
void lodsq(const Reg64& dst, const Reg& src) {AppendInstr(I_LODS_Q, 0xAD, E_REXW_PREFIX, Dummy(W(dst),rax), Dummy(RW(src),rsi));}
|
|
#endif
|
|
void rep_lodsb(const Reg8& dst, const Reg& src, const Reg& count) {AppendInstr(I_LODS_B, 0xAC, E_REP_PREFIX, Dummy(RW(dst),al), Dummy(RW(src),zsi), Dummy(RW(count),zcx));} // dst is RW because of ecx == 0
|
|
void rep_lodsw(const Reg16& dst, const Reg& src, const Reg& count) {AppendInstr(I_LODS_W, 0xAD, E_REP_PREFIX | E_OPERAND_SIZE_PREFIX, Dummy(RW(dst),ax), Dummy(RW(src),zsi), Dummy(RW(count),zcx));} // dst is RW because of ecx == 0
|
|
void rep_lodsd(const Reg32& dst, const Reg& src, const Reg& count) {AppendInstr(I_LODS_D, 0xAD, E_REP_PREFIX, Dummy(RW(dst),eax), Dummy(RW(src),zsi), Dummy(RW(count),zcx));} // dst is RW because of ecx == 0
|
|
#ifdef JITASM64
|
|
void rep_lodsq(const Reg64& dst, const Reg& src, const Reg& count) {AppendInstr(I_LODS_Q, 0xAD, E_REP_PREFIX | E_REXW_PREFIX, Dummy(RW(dst),rax), Dummy(RW(src),rsi), Dummy(RW(count),rcx));} // dst is RW because of ecx == 0
|
|
#endif
|
|
void loop(const std::string& label_name) {AppendInstr(I_LOOP, 0xE2, E_SPECIAL, Imm64(GetLabelID(label_name)), Dummy(RW(zcx)));} // short jump only
|
|
void loope(const std::string& label_name) {AppendInstr(I_LOOP, 0xE1, E_SPECIAL, Imm64(GetLabelID(label_name)), Dummy(RW(zcx)));} // short jump only
|
|
void loopne(const std::string& label_name) {AppendInstr(I_LOOP, 0xE0, E_SPECIAL, Imm64(GetLabelID(label_name)), Dummy(RW(zcx)));} // short jump only
|
|
void lsl(const Reg16& dst, const Reg16& src) {AppendInstr(I_LSL, 0x0F03, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void lsl(const Reg16& dst, const Mem16& src) {AppendInstr(I_LSL, 0x0F03, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void lsl(const Reg32& dst, const Reg32& src) {AppendInstr(I_LSL, 0x0F03, 0, RW(dst), R(src));}
|
|
void lsl(const Reg32& dst, const Mem16& src) {AppendInstr(I_LSL, 0x0F03, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void lsl(const Reg64& dst, const Reg32& src) {AppendInstr(I_LSL, 0x0F03, E_REXW_PREFIX, RW(dst), R(src));}
|
|
void lsl(const Reg64& dst, const Mem16& src) {AppendInstr(I_LSL, 0x0F03, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void ltr(const Reg16& src) {AppendInstr(I_LTR, 0x0F00, 0, Imm8(3), R(src));}
|
|
void ltr(const Mem16& src) {AppendInstr(I_LTR, 0x0F00, 0, Imm8(3), R(src));}
|
|
void mov(const Reg8& dst, const Reg8& src) {AppendInstr(I_MOV, 0x8A, 0, W(dst), R(src));}
|
|
void mov(const Mem8& dst, const Reg8& src) {AppendInstr(I_MOV, 0x88, E_SPECIAL, R(src), W(dst));}
|
|
void mov(const Reg16& dst, const Reg16& src) {AppendInstr(I_MOV, 0x8B, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void mov(const Mem16& dst, const Reg16& src) {AppendInstr(I_MOV, 0x89, E_OPERAND_SIZE_PREFIX | E_SPECIAL, R(src), W(dst));}
|
|
void mov(const Reg32& dst, const Reg32& src) {AppendInstr(I_MOV, 0x8B, 0, W(dst), R(src));}
|
|
void mov(const Mem32& dst, const Reg32& src) {AppendInstr(I_MOV, 0x89, E_SPECIAL, R(src), W(dst));}
|
|
void mov(const Reg8& dst, const Mem8& src) {AppendInstr(I_MOV, 0x8A, E_SPECIAL, W(dst), R(src));}
|
|
void mov(const Reg16& dst, const Mem16& src) {AppendInstr(I_MOV, 0x8B, E_OPERAND_SIZE_PREFIX | E_SPECIAL, W(dst), R(src));}
|
|
void mov(const Reg32& dst, const Mem32& src) {AppendInstr(I_MOV, 0x8B, E_SPECIAL, W(dst), R(src));}
|
|
void mov(const Reg8& dst, const Imm8& imm) {AppendInstr(I_MOV, 0xB0, 0, W(dst), imm);}
|
|
void mov(const Reg16& dst, const Imm16& imm) {AppendInstr(I_MOV, 0xB8, E_OPERAND_SIZE_PREFIX, W(dst), imm);}
|
|
void mov(const Reg32& dst, const Imm32& imm) {AppendInstr(I_MOV, 0xB8, 0, W(dst), imm);}
|
|
void mov(const Mem8& dst, const Imm8& imm) {AppendInstr(I_MOV, 0xC6, 0, Imm8(0), W(dst), imm);}
|
|
void mov(const Mem16& dst, const Imm16& imm) {AppendInstr(I_MOV, 0xC7, E_OPERAND_SIZE_PREFIX, Imm8(0), W(dst), imm);}
|
|
void mov(const Mem32& dst, const Imm32& imm) {AppendInstr(I_MOV, 0xC7, 0, Imm8(0), W(dst), imm);}
|
|
#ifdef JITASM64
|
|
void mov(const Reg64& dst, const Reg64& src) {AppendInstr(I_MOV, 0x8B, E_REXW_PREFIX, W(dst), R(src));}
|
|
void mov(const Mem64& dst, const Reg64& src) {AppendInstr(I_MOV, 0x89, E_REXW_PREFIX, R(src), W(dst));}
|
|
void mov(const Reg64& dst, const Mem64& src) {AppendInstr(I_MOV, 0x8B, E_REXW_PREFIX, W(dst), R(src));}
|
|
void mov(const Reg64& dst, const Imm64& imm) {detail::IsInt32(imm.GetImm()) ? AppendInstr(I_MOV, 0xC7, E_REXW_PREFIX, Imm8(0), W(dst), Imm32((sint32) imm.GetImm())) : AppendInstr(I_MOV, 0xB8, E_REXW_PREFIX, W(dst), imm);}
|
|
void mov(const Mem64& dst, const Imm32& imm) {AppendInstr(I_MOV, 0xC7, E_REXW_PREFIX, Imm8(0), W(dst), imm);}
|
|
void mov(const Reg64_rax& dst, const MemOffset64& src) {AppendInstr(I_MOV, 0xA1, E_REXW_PREFIX, Imm64(src.GetOffset()), Dummy(W(dst)));}
|
|
void mov(const MemOffset64& dst, const Reg64_rax& src) {AppendInstr(I_MOV, 0xA3, E_REXW_PREFIX, Imm64(dst.GetOffset()), Dummy(R(src)));}
|
|
#endif
|
|
void movbe(const Reg16& dst, const Mem16& src) {AppendInstr(I_MOVBE, 0x0F38F0, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void movbe(const Reg32& dst, const Mem32& src) {AppendInstr(I_MOVBE, 0x0F38F0, 0, W(dst), R(src));}
|
|
void movbe(const Mem16& dst, const Reg16& src) {AppendInstr(I_MOVBE, 0x0F38F1, E_OPERAND_SIZE_PREFIX, R(src), W(dst));}
|
|
void movbe(const Mem32& dst, const Reg32& src) {AppendInstr(I_MOVBE, 0x0F38F1, 0, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movbe(const Reg64& dst, const Mem64& src) {AppendInstr(I_MOVBE, 0x0F38F0, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movbe(const Mem64& dst, const Reg64& src) {AppendInstr(I_MOVBE, 0x0F38F1, E_REXW_PREFIX, R(src), W(dst));}
|
|
#endif
|
|
void movsb(const Reg& dst, const Reg& src) {AppendInstr(I_MOVS_B, 0xA4, 0, Dummy(RW(dst), zdi), Dummy(RW(src), zsi));}
|
|
void movsw(const Reg& dst, const Reg& src) {AppendInstr(I_MOVS_W, 0xA5, E_OPERAND_SIZE_PREFIX, Dummy(RW(dst), zdi), Dummy(RW(src), zsi));}
|
|
void movsd(const Reg& dst, const Reg& src) {AppendInstr(I_MOVS_D, 0xA5, 0, Dummy(RW(dst), zdi), Dummy(RW(src), zsi));}
|
|
#ifdef JITASM64
|
|
void movsq(const Reg& dst, const Reg& src) {AppendInstr(I_MOVS_Q, 0xA5, E_REXW_PREFIX, Dummy(RW(dst), rdi), Dummy(RW(src), rsi));}
|
|
#endif
|
|
void rep_movsb() {rep_movsb(zdi, zsi, zcx);}
|
|
void rep_movsw() {rep_movsw(zdi, zsi, zcx);}
|
|
void rep_movsd() {rep_movsd(zdi, zsi, zcx);}
|
|
void rep_movsb(const Reg& dst, const Reg& src, const Reg& count) {AppendInstr(I_MOVS_B, 0xA4, E_REP_PREFIX, Dummy(RW(dst), zdi), Dummy(RW(src), zsi), Dummy(RW(count), ecx));}
|
|
void rep_movsw(const Reg& dst, const Reg& src, const Reg& count) {AppendInstr(I_MOVS_W, 0xA5, E_REP_PREFIX | E_OPERAND_SIZE_PREFIX, Dummy(RW(dst), zdi), Dummy(RW(src), zsi), Dummy(RW(count), ecx));}
|
|
void rep_movsd(const Reg& dst, const Reg& src, const Reg& count) {AppendInstr(I_MOVS_D, 0xA5, E_REP_PREFIX, Dummy(RW(dst), zdi), Dummy(RW(src), zsi), Dummy(RW(count), ecx));}
|
|
#ifdef JITASM64
|
|
void rep_movsq() {rep_movsq(rdi, rsi, rcx);}
|
|
void rep_movsq(const Reg64& dst, const Reg64& src, const Reg64& count) {AppendInstr(I_MOVS_Q, 0xA5, E_REP_PREFIX | E_REXW_PREFIX, Dummy(RW(dst), rdi), Dummy(RW(src), rsi), Dummy(RW(count), rcx));}
|
|
#endif
|
|
void movsx(const Reg16& dst, const Reg8& src) {AppendInstr(I_MOVSX, 0x0FBE, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void movsx(const Reg16& dst, const Mem8& src) {AppendInstr(I_MOVSX, 0x0FBE, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void movsx(const Reg32& dst, const Reg8& src) {AppendInstr(I_MOVSX, 0x0FBE, 0, W(dst), R(src));}
|
|
void movsx(const Reg32& dst, const Mem8& src) {AppendInstr(I_MOVSX, 0x0FBE, 0, W(dst), R(src));}
|
|
void movsx(const Reg32& dst, const Reg16& src) {AppendInstr(I_MOVSX, 0x0FBF, 0, W(dst), R(src));}
|
|
void movsx(const Reg32& dst, const Mem16& src) {AppendInstr(I_MOVSX, 0x0FBF, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void movsx(const Reg64& dst, const Reg8& src) {AppendInstr(I_MOVSX, 0x0FBE, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movsx(const Reg64& dst, const Mem8& src) {AppendInstr(I_MOVSX, 0x0FBE, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movsx(const Reg64& dst, const Reg16& src) {AppendInstr(I_MOVSX, 0x0FBF, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movsx(const Reg64& dst, const Mem16& src) {AppendInstr(I_MOVSX, 0x0FBF, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movsxd(const Reg64& dst, const Reg32& src) {AppendInstr(I_MOVSXD, 0x63, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movsxd(const Reg64& dst, const Mem32& src) {AppendInstr(I_MOVSXD, 0x63, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void movzx(const Reg16& dst, const Reg8& src) {AppendInstr(I_MOVZX, 0x0FB6, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void movzx(const Reg16& dst, const Mem8& src) {AppendInstr(I_MOVZX, 0x0FB6, E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void movzx(const Reg32& dst, const Reg8& src) {AppendInstr(I_MOVZX, 0x0FB6, 0, W(dst), R(src));}
|
|
void movzx(const Reg32& dst, const Mem8& src) {AppendInstr(I_MOVZX, 0x0FB6, 0, W(dst), R(src));}
|
|
void movzx(const Reg32& dst, const Reg16& src) {AppendInstr(I_MOVZX, 0x0FB7, 0, W(dst), R(src));}
|
|
void movzx(const Reg32& dst, const Mem16& src) {AppendInstr(I_MOVZX, 0x0FB7, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void movzx(const Reg64& dst, const Reg8& src) {AppendInstr(I_MOVZX, 0x0FB6, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movzx(const Reg64& dst, const Mem8& src) {AppendInstr(I_MOVZX, 0x0FB6, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movzx(const Reg64& dst, const Reg16& src) {AppendInstr(I_MOVZX, 0x0FB7, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movzx(const Reg64& dst, const Mem16& src) {AppendInstr(I_MOVZX, 0x0FB7, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void mul(const Reg8& src) {AppendInstr(I_MUL, 0xF6, 0, Imm8(4), R(src), Dummy(RW(ax)));}
|
|
void mul(const Mem8& src) {AppendInstr(I_MUL, 0xF6, 0, Imm8(4), R(src), Dummy(RW(ax)));}
|
|
void mul(const Reg16& src) {AppendInstr(I_MUL, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(4), R(src), Dummy(RW(ax)), Dummy(W(dx)));}
|
|
void mul(const Mem16& src) {AppendInstr(I_MUL, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(4), R(src), Dummy(RW(ax)), Dummy(W(dx)));}
|
|
void mul(const Reg32& src) {AppendInstr(I_MUL, 0xF7, 0, Imm8(4), R(src), Dummy(RW(eax)), Dummy(W(edx)));}
|
|
void mul(const Mem32& src) {AppendInstr(I_MUL, 0xF7, 0, Imm8(4), R(src), Dummy(RW(eax)), Dummy(W(edx)));}
|
|
#ifdef JITASM64
|
|
void mul(const Reg64& src) {AppendInstr(I_MUL, 0xF7, E_REXW_PREFIX, Imm8(4), R(src), Dummy(RW(rax)), Dummy(W(rdx)));}
|
|
void mul(const Mem64& src) {AppendInstr(I_MUL, 0xF7, E_REXW_PREFIX, Imm8(4), R(src), Dummy(RW(rax)), Dummy(W(rdx)));}
|
|
#endif
|
|
void neg(const Reg8& dst) {AppendInstr(I_NEG, 0xF6, 0, Imm8(3), RW(dst));}
|
|
void neg(const Mem8& dst) {AppendInstr(I_NEG, 0xF6, 0, Imm8(3), RW(dst));}
|
|
void neg(const Reg16& dst) {AppendInstr(I_NEG, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst));}
|
|
void neg(const Mem16& dst) {AppendInstr(I_NEG, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst));}
|
|
void neg(const Reg32& dst) {AppendInstr(I_NEG, 0xF7, 0, Imm8(3), RW(dst));}
|
|
void neg(const Mem32& dst) {AppendInstr(I_NEG, 0xF7, 0, Imm8(3), RW(dst));}
|
|
#ifdef JITASM64
|
|
void neg(const Reg64& dst) {AppendInstr(I_NEG, 0xF7, E_REXW_PREFIX, Imm8(3), RW(dst));}
|
|
void neg(const Mem64& dst) {AppendInstr(I_NEG, 0xF7, E_REXW_PREFIX, Imm8(3), RW(dst));}
|
|
#endif
|
|
void nop() {AppendInstr(I_NOP, 0x90, 0);}
|
|
void not_(const Reg8& dst) {AppendInstr(I_NOT, 0xF6, 0, Imm8(2), RW(dst));}
|
|
void not_(const Mem8& dst) {AppendInstr(I_NOT, 0xF6, 0, Imm8(2), RW(dst));}
|
|
void not_(const Reg16& dst) {AppendInstr(I_NOT, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst));}
|
|
void not_(const Mem16& dst) {AppendInstr(I_NOT, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst));}
|
|
void not_(const Reg32& dst) {AppendInstr(I_NOT, 0xF7, 0, Imm8(2), RW(dst));}
|
|
void not_(const Mem32& dst) {AppendInstr(I_NOT, 0xF7, 0, Imm8(2), RW(dst));}
|
|
#ifdef JITASM64
|
|
void not_(const Reg64& dst) {AppendInstr(I_NOT, 0xF7, E_REXW_PREFIX, Imm8(2), RW(dst));}
|
|
void not_(const Mem64& dst) {AppendInstr(I_NOT, 0xF7, E_REXW_PREFIX, Imm8(2), RW(dst));}
|
|
#endif
|
|
void or_(const Reg8& dst, const Imm8& imm) {AppendInstr(I_OR, 0x80, E_SPECIAL, Imm8(1), RW(dst), imm);}
|
|
void or_(const Mem8& dst, const Imm8& imm) {AppendInstr(I_OR, 0x80, 0, Imm8(1), RW(dst), imm);}
|
|
void or_(const Reg16& dst, const Imm16& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Mem16& dst, const Imm16& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Reg32& dst, const Imm32& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Mem32& dst, const Imm32& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Reg8& dst, const Reg8& src) {AppendInstr(I_OR, 0x08, 0, R(src), RW(dst));}
|
|
void or_(const Mem8& dst, const Reg8& src) {AppendInstr(I_OR, 0x08, 0, R(src), RW(dst));}
|
|
void or_(const Reg8& dst, const Mem8& src) {AppendInstr(I_OR, 0x0A, 0, RW(dst), R(src));}
|
|
void or_(const Reg16& dst, const Reg16& src) {AppendInstr(I_OR, 0x09, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void or_(const Mem16& dst, const Reg16& src) {AppendInstr(I_OR, 0x09, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void or_(const Reg16& dst, const Mem16& src) {AppendInstr(I_OR, 0x0B, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void or_(const Reg32& dst, const Reg32& src) {AppendInstr(I_OR, 0x09, 0, R(src), RW(dst));}
|
|
void or_(const Mem32& dst, const Reg32& src) {AppendInstr(I_OR, 0x09, 0, R(src), RW(dst));}
|
|
void or_(const Reg32& dst, const Mem32& src) {AppendInstr(I_OR, 0x0B, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void or_(const Reg64& dst, const Imm32& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Mem64& dst, const Imm32& imm) {AppendInstr(I_OR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(1), RW(dst), detail::ImmXor8(imm));}
|
|
void or_(const Reg64& dst, const Reg64& src) {AppendInstr(I_OR, 0x09, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void or_(const Mem64& dst, const Reg64& src) {AppendInstr(I_OR, 0x09, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void or_(const Reg64& dst, const Mem64& src) {AppendInstr(I_OR, 0x0B, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void out(const Imm8& dst, const Reg8& src) {AppendInstr(I_OUT, 0xE6, 0, dst, Dummy(R(src),al));}
|
|
void out(const Imm8& dst, const Reg16& src) {AppendInstr(I_OUT, 0xE7, E_OPERAND_SIZE_PREFIX, dst, Dummy(R(src),ax));}
|
|
void out(const Imm8& dst, const Reg32& src) {AppendInstr(I_OUT, 0xE7, 0, dst, Dummy(R(src),eax));}
|
|
void out(const Reg16& dst, const Reg8& src) {AppendInstr(I_OUT, 0xEE, 0, Dummy(R(dst),dx), Dummy(R(src),al));}
|
|
void out(const Reg16& dst, const Reg16& src) {AppendInstr(I_OUT, 0xEF, E_OPERAND_SIZE_PREFIX, Dummy(R(dst),dx), Dummy(R(src),ax));}
|
|
void out(const Reg16& dst, const Reg32& src) {AppendInstr(I_OUT, 0xEF, 0, Dummy(R(dst),dx), Dummy(R(src),eax));}
|
|
void outsb(const Reg16& dst, const Reg& src) {AppendInstr(I_OUTS_B, 0x6E, 0, Dummy(RW(src),esi), Dummy(R(dst),dx));}
|
|
void outsw(const Reg16& dst, const Reg& src) {AppendInstr(I_OUTS_W, 0x6F, E_OPERAND_SIZE_PREFIX, Dummy(RW(src),esi), Dummy(R(dst),dx));}
|
|
void outsd(const Reg16& dst, const Reg& src) {AppendInstr(I_OUTS_D, 0x6F, 0, Dummy(RW(src),esi), Dummy(R(dst),dx));}
|
|
void rep_outsb(const Reg16& dst, const Reg& src, const Reg& count) {AppendInstr(I_OUTS_B, 0x6E, E_REP_PREFIX, Dummy(RW(src),esi), Dummy(R(dst),dx), Dummy(RW(count),ecx));}
|
|
void rep_outsw(const Reg16& dst, const Reg& src, const Reg& count) {AppendInstr(I_OUTS_W, 0x6F, E_REP_PREFIX | E_OPERAND_SIZE_PREFIX, Dummy(RW(src),esi), Dummy(R(dst),dx), Dummy(RW(count),ecx));}
|
|
void rep_outsd(const Reg16& dst, const Reg& src, const Reg& count) {AppendInstr(I_OUTS_D, 0x6F, E_REP_PREFIX, Dummy(RW(src),esi), Dummy(R(dst),dx), Dummy(RW(count),ecx));}
|
|
void pop(const Reg16& dst) {AppendInstr(I_POP, 0x58, E_OPERAND_SIZE_PREFIX, W(dst));}
|
|
void pop(const Mem16& dst) {AppendInstr(I_POP, 0x8F, E_OPERAND_SIZE_PREFIX, Imm8(0), W(dst));}
|
|
#ifndef JITASM64
|
|
void pop(const Reg32& dst) {AppendInstr(I_POP, 0x58, 0, W(dst), Dummy(RW(esp)));}
|
|
void pop(const Mem32& dst) {AppendInstr(I_POP, 0x8F, 0, Imm8(0), W(dst), Dummy(RW(esp)));}
|
|
#else
|
|
void pop(const Reg64& dst) {AppendInstr(I_POP, 0x58, 0, W(dst), Dummy(RW(esp)));}
|
|
void pop(const Mem64& dst) {AppendInstr(I_POP, 0x8F, 0, Imm8(0), W(dst), Dummy(RW(esp)));}
|
|
#endif
|
|
#ifndef JITASM64
|
|
void popa() {popad();}
|
|
void popad() {AppendInstr(I_POPAD, 0x61, 0, Dummy(RW(esp)));}
|
|
#endif
|
|
#ifndef JITASM64
|
|
void popf() {AppendInstr(I_POPF, 0x9D, E_OPERAND_SIZE_PREFIX, Dummy(RW(esp)));}
|
|
void popfd() {AppendInstr(I_POPFD, 0x9D, 0, Dummy(RW(esp)));}
|
|
#else
|
|
void popf() {AppendInstr(I_POPF, 0x9D, E_OPERAND_SIZE_PREFIX, Dummy(RW(esp)));}
|
|
void popfq() {AppendInstr(I_POPFQ, 0x9D, 0, Dummy(RW(esp)));}
|
|
#endif
|
|
void push(const Reg16& src) {AppendInstr(I_PUSH, 0x50, E_OPERAND_SIZE_PREFIX, R(src), Dummy(RW(esp)));}
|
|
void push(const Mem16& src) {AppendInstr(I_PUSH, 0xFF, E_OPERAND_SIZE_PREFIX, Imm8(6), R(src), Dummy(RW(esp)));}
|
|
#ifndef JITASM64
|
|
void push(const Reg32& src) {AppendInstr(I_PUSH, 0x50, 0, R(src), Dummy(RW(esp)));}
|
|
void push(const Mem32& src) {AppendInstr(I_PUSH, 0xFF, 0, Imm8(6), R(src), Dummy(RW(esp)));}
|
|
#else
|
|
void push(const Reg64& src) {AppendInstr(I_PUSH, 0x50, 0, R(src), Dummy(RW(esp)));}
|
|
void push(const Mem64& src) {AppendInstr(I_PUSH, 0xFF, 0, Imm8(6), R(src), Dummy(RW(esp)));}
|
|
#endif
|
|
void push(const Imm32& imm) {AppendInstr(I_PUSH, detail::IsInt8(imm.GetImm()) ? 0x6A : 0x68, 0, detail::ImmXor8(imm), Dummy(RW(esp)));}
|
|
#ifndef JITASM64
|
|
void pusha() {pushad();}
|
|
void pushad() {AppendInstr(I_PUSHAD, 0x60, 0, Dummy(RW(esp)));}
|
|
#endif
|
|
void pushf() {AppendInstr(I_PUSHF, 0x9C, E_OPERAND_SIZE_PREFIX, Dummy(RW(esp)));}
|
|
#ifndef JITASM64
|
|
void pushfd() {AppendInstr(I_PUSHFD, 0x9C, 0, Dummy(RW(esp)));}
|
|
#else
|
|
void pushfq() {AppendInstr(I_PUSHFQ, 0x9C, 0, Dummy(RW(esp)));}
|
|
#endif
|
|
void rcl(const Reg8& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD2, 0, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Mem8& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD2, 0, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD0, 0, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC0, 0, Imm8(2), RW(dst), shift);}
|
|
void rcl(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD0, 0, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC0, 0, Imm8(2), RW(dst), shift);}
|
|
void rcr(const Reg8& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD2, 0, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Mem8& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD2, 0, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD0, 0, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC0, 0, Imm8(3), RW(dst), shift);}
|
|
void rcr(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD0, 0, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC0, 0, Imm8(3), RW(dst), shift);}
|
|
void rol(const Reg8& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD2, 0, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Mem8& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD2, 0, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD0, 0, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC0, 0, Imm8(0), RW(dst), shift);}
|
|
void rol(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD0, 0, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC0, 0, Imm8(0), RW(dst), shift);}
|
|
void ror(const Reg8& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD2, 0, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Mem8& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD2, 0, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD0, 0, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC0, 0, Imm8(1), RW(dst), shift);}
|
|
void ror(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD0, 0, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC0, 0, Imm8(1), RW(dst), shift);}
|
|
void rcl(const Reg16& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Mem16& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst), shift);}
|
|
void rcl(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(2), RW(dst), shift);}
|
|
void rcr(const Reg16& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Mem16& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst), shift);}
|
|
void rcr(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst), shift);}
|
|
void rol(const Reg16& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Mem16& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst), shift);}
|
|
void rol(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(0), RW(dst), shift);}
|
|
void ror(const Reg16& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Mem16& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst), shift);}
|
|
void ror(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(1), RW(dst), shift);}
|
|
void rcl(const Reg32& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, 0, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Mem32& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, 0, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, 0, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, 0, Imm8(2), RW(dst), shift);}
|
|
void rcl(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, 0, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, 0, Imm8(2), RW(dst), shift);}
|
|
void rcr(const Reg32& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, 0, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Mem32& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, 0, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, 0, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, 0, Imm8(3), RW(dst), shift);}
|
|
void rcr(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, 0, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, 0, Imm8(3), RW(dst), shift);}
|
|
void rol(const Reg32& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, 0, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Mem32& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, 0, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, 0, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, 0, Imm8(0), RW(dst), shift);}
|
|
void rol(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, 0, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, 0, Imm8(0), RW(dst), shift);}
|
|
void ror(const Reg32& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, 0, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Mem32& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, 0, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, 0, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, 0, Imm8(1), RW(dst), shift);}
|
|
void ror(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, 0, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, 0, Imm8(1), RW(dst), shift);}
|
|
#ifdef JITASM64
|
|
void rcl(const Reg64& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, E_REXW_PREFIX, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Mem64& dst, const Reg8& shift) {AppendInstr(I_RCL, 0xD3, E_REXW_PREFIX, Imm8(2), RW(dst), Dummy(R(shift),cl));}
|
|
void rcl(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, E_REXW_PREFIX, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, E_REXW_PREFIX, Imm8(2), RW(dst), shift);}
|
|
void rcl(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCL, 0xD1, E_REXW_PREFIX, Imm8(2), RW(dst)) : AppendInstr(I_RCL, 0xC1, E_REXW_PREFIX, Imm8(2), RW(dst), shift);}
|
|
void rcr(const Reg64& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, E_REXW_PREFIX, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Mem64& dst, const Reg8& shift) {AppendInstr(I_RCR, 0xD3, E_REXW_PREFIX, Imm8(3), RW(dst), Dummy(R(shift),cl));}
|
|
void rcr(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, E_REXW_PREFIX, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, E_REXW_PREFIX, Imm8(3), RW(dst), shift);}
|
|
void rcr(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_RCR, 0xD1, E_REXW_PREFIX, Imm8(3), RW(dst)) : AppendInstr(I_RCR, 0xC1, E_REXW_PREFIX, Imm8(3), RW(dst), shift);}
|
|
void rol(const Reg64& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, E_REXW_PREFIX, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Mem64& dst, const Reg8& shift) {AppendInstr(I_ROL, 0xD3, E_REXW_PREFIX, Imm8(0), RW(dst), Dummy(R(shift),cl));}
|
|
void rol(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, E_REXW_PREFIX, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, E_REXW_PREFIX, Imm8(0), RW(dst), shift);}
|
|
void rol(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROL, 0xD1, E_REXW_PREFIX, Imm8(0), RW(dst)) : AppendInstr(I_ROL, 0xC1, E_REXW_PREFIX, Imm8(0), RW(dst), shift);}
|
|
void ror(const Reg64& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, E_REXW_PREFIX, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Mem64& dst, const Reg8& shift) {AppendInstr(I_ROR, 0xD3, E_REXW_PREFIX, Imm8(1), RW(dst), Dummy(R(shift),cl));}
|
|
void ror(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, E_REXW_PREFIX, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, E_REXW_PREFIX, Imm8(1), RW(dst), shift);}
|
|
void ror(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_ROR, 0xD1, E_REXW_PREFIX, Imm8(1), RW(dst)) : AppendInstr(I_ROR, 0xC1, E_REXW_PREFIX, Imm8(1), RW(dst), shift);}
|
|
#endif
|
|
void rdmsr() {AppendInstr(I_RDMSR, 0x0F32, 0, Dummy(R(ecx)), Dummy(W(edx)), Dummy(W(eax)));}
|
|
void rdpmc() {AppendInstr(I_RDMSR, 0x0F33, 0, Dummy(R(ecx)), Dummy(W(edx)), Dummy(W(eax)));}
|
|
void rdtsc() {AppendInstr(I_RDPMC, 0x0F31, 0, Dummy(W(edx)), Dummy(W(eax)), Dummy(W(ecx)));}
|
|
void ret() {AppendInstr(I_RET, 0xC3, 0, Dummy(RW(esp)));}
|
|
void ret(const Imm16& imm) {AppendInstr(I_RET, 0xC2, 0, imm, Dummy(RW(esp)));}
|
|
void rsm() {AppendInstr(I_RSM, 0x0FAA, 0);}
|
|
void sal(const Reg8& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Mem8& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Reg8& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sal(const Mem8& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sar(const Reg8& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD2, 0, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Mem8& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD2, 0, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD0, 0, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC0, 0, Imm8(7), RW(dst), shift);}
|
|
void sar(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD0, 0, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC0, 0, Imm8(7), RW(dst), shift);}
|
|
void shl(const Reg8& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD2, 0, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Mem8& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD2, 0, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD0, 0, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC0, 0, Imm8(4), RW(dst), shift);}
|
|
void shl(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD0, 0, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC0, 0, Imm8(4), RW(dst), shift);}
|
|
void shr(const Reg8& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD2, 0, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Mem8& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD2, 0, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Reg8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD0, 0, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC0, 0, Imm8(5), RW(dst), shift);}
|
|
void shr(const Mem8& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD0, 0, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC0, 0, Imm8(5), RW(dst), shift);}
|
|
void sal(const Reg16& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Mem16& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Reg16& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sal(const Mem16& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sar(const Reg16& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Mem16& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst), shift);}
|
|
void sar(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(7), RW(dst), shift);}
|
|
void shl(const Reg16& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Mem16& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst), shift);}
|
|
void shl(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(4), RW(dst), shift);}
|
|
void shr(const Reg16& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Mem16& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Reg16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst), shift);}
|
|
void shr(const Mem16& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst), shift);}
|
|
void sal(const Reg32& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Mem32& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Reg32& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sal(const Mem32& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sar(const Reg32& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, 0, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Mem32& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, 0, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, 0, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, 0, Imm8(7), RW(dst), shift);}
|
|
void sar(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, 0, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, 0, Imm8(7), RW(dst), shift);}
|
|
void shl(const Reg32& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, 0, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Mem32& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, 0, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, 0, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, 0, Imm8(4), RW(dst), shift);}
|
|
void shl(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, 0, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, 0, Imm8(4), RW(dst), shift);}
|
|
void shr(const Reg32& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, 0, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Mem32& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, 0, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Reg32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, 0, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, 0, Imm8(5), RW(dst), shift);}
|
|
void shr(const Mem32& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, 0, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, 0, Imm8(5), RW(dst), shift);}
|
|
#ifdef JITASM64
|
|
void sal(const Reg64& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Mem64& dst, const Reg8& shift) {shl(dst, shift);}
|
|
void sal(const Reg64& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sal(const Mem64& dst, const Imm8& shift) {shl(dst, shift);}
|
|
void sar(const Reg64& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, E_REXW_PREFIX, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Mem64& dst, const Reg8& shift) {AppendInstr(I_SAR, 0xD3, E_REXW_PREFIX, Imm8(7), RW(dst), Dummy(R(shift),cl));}
|
|
void sar(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, E_REXW_PREFIX, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, E_REXW_PREFIX, Imm8(7), RW(dst), shift);}
|
|
void sar(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SAR, 0xD1, E_REXW_PREFIX, Imm8(7), RW(dst)) : AppendInstr(I_SAR, 0xC1, E_REXW_PREFIX, Imm8(7), RW(dst), shift);}
|
|
void shl(const Reg64& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, E_REXW_PREFIX, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Mem64& dst, const Reg8& shift) {AppendInstr(I_SHL, 0xD3, E_REXW_PREFIX, Imm8(4), RW(dst), Dummy(R(shift),cl));}
|
|
void shl(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, E_REXW_PREFIX, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, E_REXW_PREFIX, Imm8(4), RW(dst), shift);}
|
|
void shl(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHL, 0xD1, E_REXW_PREFIX, Imm8(4), RW(dst)) : AppendInstr(I_SHL, 0xC1, E_REXW_PREFIX, Imm8(4), RW(dst), shift);}
|
|
void shr(const Reg64& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, E_REXW_PREFIX, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Mem64& dst, const Reg8& shift) {AppendInstr(I_SHR, 0xD3, E_REXW_PREFIX, Imm8(5), RW(dst), Dummy(R(shift),cl));}
|
|
void shr(const Reg64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, E_REXW_PREFIX, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, E_REXW_PREFIX, Imm8(5), RW(dst), shift);}
|
|
void shr(const Mem64& dst, const Imm8& shift) {shift.GetImm() == 1 ? AppendInstr(I_SHR, 0xD1, E_REXW_PREFIX, Imm8(5), RW(dst)) : AppendInstr(I_SHR, 0xC1, E_REXW_PREFIX, Imm8(5), RW(dst), shift);}
|
|
#endif
|
|
void sbb(const Reg8& dst, const Imm8& imm) {AppendInstr(I_SBB, 0x80, E_SPECIAL, Imm8(3), RW(dst), imm);}
|
|
void sbb(const Mem8& dst, const Imm8& imm) {AppendInstr(I_SBB, 0x80, 0, Imm8(3), RW(dst), imm);}
|
|
void sbb(const Reg16& dst, const Imm16& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Mem16& dst, const Imm16& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Reg32& dst, const Imm32& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Mem32& dst, const Imm32& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Reg8& dst, const Reg8& src) {AppendInstr(I_SBB, 0x18, 0, R(src), RW(dst));}
|
|
void sbb(const Mem8& dst, const Reg8& src) {AppendInstr(I_SBB, 0x18, 0, R(src), RW(dst));}
|
|
void sbb(const Reg8& dst, const Mem8& src) {AppendInstr(I_SBB, 0x1A, 0, RW(dst), R(src));}
|
|
void sbb(const Reg16& dst, const Reg16& src) {AppendInstr(I_SBB, 0x19, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void sbb(const Mem16& dst, const Reg16& src) {AppendInstr(I_SBB, 0x19, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void sbb(const Reg16& dst, const Mem16& src) {AppendInstr(I_SBB, 0x1B, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void sbb(const Reg32& dst, const Reg32& src) {AppendInstr(I_SBB, 0x19, 0, R(src), RW(dst));}
|
|
void sbb(const Mem32& dst, const Reg32& src) {AppendInstr(I_SBB, 0x19, 0, R(src), RW(dst));}
|
|
void sbb(const Reg32& dst, const Mem32& src) {AppendInstr(I_SBB, 0x1B, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void sbb(const Reg64& dst, const Imm32& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Mem64& dst, const Imm32& imm) {AppendInstr(I_SBB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(3), RW(dst), detail::ImmXor8(imm));}
|
|
void sbb(const Reg64& dst, const Reg64& src) {AppendInstr(I_SBB, 0x19, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void sbb(const Mem64& dst, const Reg64& src) {AppendInstr(I_SBB, 0x19, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void sbb(const Reg64& dst, const Mem64& src) {AppendInstr(I_SBB, 0x1B, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void scasb() {AppendInstr(I_SCAS_B, 0xAE, 0, Dummy(R(al)), Dummy(RW(edi)));}
|
|
void scasw() {AppendInstr(I_SCAS_W, 0xAF, E_OPERAND_SIZE_PREFIX, Dummy(R(ax)), Dummy(RW(edi)));}
|
|
void scasd() {AppendInstr(I_SCAS_D, 0xAF, 0, Dummy(R(eax)), Dummy(RW(edi)));}
|
|
#ifdef JITASM64
|
|
void scasq() {AppendInstr(I_SCAS_Q, 0xAF, E_REXW_PREFIX, Dummy(R(rax)), Dummy(RW(rdi)));}
|
|
#endif
|
|
void seta(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F97, 0, Imm8(0), W(dst));}
|
|
void seta(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F97, 0, Imm8(0), W(dst));}
|
|
void setae(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F93, 0, Imm8(0), W(dst));}
|
|
void setae(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F93, 0, Imm8(0), W(dst));}
|
|
void setb(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F92, 0, Imm8(0), W(dst));}
|
|
void setb(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F92, 0, Imm8(0), W(dst));}
|
|
void setbe(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F96, 0, Imm8(0), W(dst));}
|
|
void setbe(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F96, 0, Imm8(0), W(dst));}
|
|
void setc(const Reg8& dst) {setb(dst);}
|
|
void setc(const Mem8& dst) {setb(dst);}
|
|
void sete(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F94, 0, Imm8(0), W(dst));}
|
|
void sete(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F94, 0, Imm8(0), W(dst));}
|
|
void setg(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9F, 0, Imm8(0), W(dst));}
|
|
void setg(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9F, 0, Imm8(0), W(dst));}
|
|
void setge(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9D, 0, Imm8(0), W(dst));}
|
|
void setge(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9D, 0, Imm8(0), W(dst));}
|
|
void setl(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9C, 0, Imm8(0), W(dst));}
|
|
void setl(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9C, 0, Imm8(0), W(dst));}
|
|
void setle(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9E, 0, Imm8(0), W(dst));}
|
|
void setle(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9E, 0, Imm8(0), W(dst));}
|
|
void setna(const Reg8& dst) {setbe(dst);}
|
|
void setna(const Mem8& dst) {setbe(dst);}
|
|
void setnae(const Reg8& dst) {setb(dst);}
|
|
void setnae(const Mem8& dst) {setb(dst);}
|
|
void setnb(const Reg8& dst) {setae(dst);}
|
|
void setnb(const Mem8& dst) {setae(dst);}
|
|
void setnbe(const Reg8& dst) {seta(dst);}
|
|
void setnbe(const Mem8& dst) {seta(dst);}
|
|
void setnc(const Reg8& dst) {setae(dst);}
|
|
void setnc(const Mem8& dst) {setae(dst);}
|
|
void setne(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F95, 0, Imm8(0), W(dst));}
|
|
void setne(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F95, 0, Imm8(0), W(dst));}
|
|
void setng(const Reg8& dst) {setle(dst);}
|
|
void setng(const Mem8& dst) {setle(dst);}
|
|
void setnge(const Reg8& dst) {setl(dst);}
|
|
void setnge(const Mem8& dst) {setl(dst);}
|
|
void setnl(const Reg8& dst) {setge(dst);}
|
|
void setnl(const Mem8& dst) {setge(dst);}
|
|
void setnle(const Reg8& dst) {setg(dst);}
|
|
void setnle(const Mem8& dst) {setg(dst);}
|
|
void setno(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F91, 0, Imm8(0), W(dst));}
|
|
void setno(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F91, 0, Imm8(0), W(dst));}
|
|
void setnp(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9B, 0, Imm8(0), W(dst));}
|
|
void setnp(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9B, 0, Imm8(0), W(dst));}
|
|
void setns(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F99, 0, Imm8(0), W(dst));}
|
|
void setns(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F99, 0, Imm8(0), W(dst));}
|
|
void setnz(const Reg8& dst) {setne(dst);}
|
|
void setnz(const Mem8& dst) {setne(dst);}
|
|
void seto(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F90, 0, Imm8(0), W(dst));}
|
|
void seto(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F90, 0, Imm8(0), W(dst));}
|
|
void setp(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F9A, 0, Imm8(0), W(dst));}
|
|
void setp(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F9A, 0, Imm8(0), W(dst));}
|
|
void setpe(const Reg8& dst) {setp(dst);}
|
|
void setpe(const Mem8& dst) {setp(dst);}
|
|
void setpo(const Reg8& dst) {setnp(dst);}
|
|
void setpo(const Mem8& dst) {setnp(dst);}
|
|
void sets(const Reg8& dst) {AppendInstr(I_SETCC, 0x0F98, 0, Imm8(0), W(dst));}
|
|
void sets(const Mem8& dst) {AppendInstr(I_SETCC, 0x0F98, 0, Imm8(0), W(dst));}
|
|
void setz(const Reg8& dst) {sete(dst);}
|
|
void setz(const Mem8& dst) {sete(dst);}
|
|
void shld(const Reg16& dst, const Reg16& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), place);}
|
|
void shld(const Mem16& dst, const Reg16& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), place);}
|
|
void shld(const Reg16& dst, const Reg16& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shld(const Mem16& dst, const Reg16& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shld(const Reg32& dst, const Reg32& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, 0, R(src), RW(dst), place);}
|
|
void shld(const Mem32& dst, const Reg32& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, 0, R(src), RW(dst), place);}
|
|
void shld(const Reg32& dst, const Reg32& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, 0, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shld(const Mem32& dst, const Reg32& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, 0, R(src), RW(dst), Dummy(R(place),cl));}
|
|
#ifdef JITASM64
|
|
void shld(const Reg64& dst, const Reg64& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, E_REXW_PREFIX, R(src), RW(dst), place);}
|
|
void shld(const Mem64& dst, const Reg64& src, const Imm8& place) {AppendInstr(I_SHLD, 0x0FA4, E_REXW_PREFIX, R(src), RW(dst), place);}
|
|
void shld(const Reg64& dst, const Reg64& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, E_REXW_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shld(const Mem64& dst, const Reg64& src, const Reg8& place) {AppendInstr(I_SHLD, 0x0FA5, E_REXW_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
#endif
|
|
void shrd(const Reg16& dst, const Reg16& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), place);}
|
|
void shrd(const Mem16& dst, const Reg16& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), place);}
|
|
void shrd(const Reg16& dst, const Reg16& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shrd(const Mem16& dst, const Reg16& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, E_OPERAND_SIZE_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shrd(const Reg32& dst, const Reg32& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, 0, R(src), RW(dst), place);}
|
|
void shrd(const Mem32& dst, const Reg32& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, 0, R(src), RW(dst), place);}
|
|
void shrd(const Reg32& dst, const Reg32& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, 0, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shrd(const Mem32& dst, const Reg32& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, 0, R(src), RW(dst), Dummy(R(place),cl));}
|
|
#ifdef JITASM64
|
|
void shrd(const Reg64& dst, const Reg64& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, E_REXW_PREFIX, R(src), RW(dst), place);}
|
|
void shrd(const Mem64& dst, const Reg64& src, const Imm8& place) {AppendInstr(I_SHRD, 0x0FAC, E_REXW_PREFIX, R(src), RW(dst), place);}
|
|
void shrd(const Reg64& dst, const Reg64& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, E_REXW_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
void shrd(const Mem64& dst, const Reg64& src, const Reg8& place) {AppendInstr(I_SHRD, 0x0FAD, E_REXW_PREFIX, R(src), RW(dst), Dummy(R(place),cl));}
|
|
#endif
|
|
template<class Ty> void sgdt(const MemT<Ty>& dst) {AppendInstr(I_SGDT, 0x0F01, 0, Imm8(0), W(dst));}
|
|
template<class Ty> void sidt(const MemT<Ty>& dst) {AppendInstr(I_SIDT, 0x0F01, 0, Imm8(1), W(dst));}
|
|
void sldt(const Reg16& dst) {AppendInstr(I_SLDT, 0x0F00, E_OPERAND_SIZE_PREFIX, Imm8(0), W(dst));}
|
|
void sldt(const Mem16& dst) {AppendInstr(I_SLDT, 0x0F00, 0, Imm8(0), W(dst));}
|
|
#ifdef JITASM64
|
|
void sldt(const Reg64& dst) {AppendInstr(I_SLDT, 0x0F00, E_REXW_PREFIX, Imm8(0), W(dst));}
|
|
#endif
|
|
void smsw(const Reg16& dst) {AppendInstr(I_SMSW, 0x0F01, E_OPERAND_SIZE_PREFIX, Imm8(4), W(dst));}
|
|
void smsw(const Mem16& dst) {AppendInstr(I_SMSW, 0x0F01, 0, Imm8(4), W(dst));}
|
|
#ifdef JITASM64
|
|
void smsw(const Reg64& dst) {AppendInstr(I_SMSW, 0x0F01, E_REXW_PREFIX, Imm8(4), W(dst));}
|
|
#endif
|
|
void stc() {AppendInstr(I_STC, 0xF9, 0);}
|
|
void std() {AppendInstr(I_STD, 0xFD, 0);}
|
|
void sti() {AppendInstr(I_STI, 0xFB, 0);}
|
|
void stosb(const Reg& dst, const Reg8& src) {AppendInstr(I_STOS_B, 0xAA, 0, Dummy(R(src),al), Dummy(RW(dst),zdi));}
|
|
void stosw(const Reg& dst, const Reg16& src) {AppendInstr(I_STOS_W, 0xAB, E_OPERAND_SIZE_PREFIX, Dummy(R(src),ax), Dummy(RW(dst),zdi));}
|
|
void stosd(const Reg& dst, const Reg32& src) {AppendInstr(I_STOS_D, 0xAB, 0, Dummy(R(src),eax), Dummy(RW(dst),zdi));}
|
|
#ifdef JITASM64
|
|
void stosq(const Reg& dst, const Reg64& src) {AppendInstr(I_STOS_Q, 0xAB, E_REXW_PREFIX, Dummy(R(src),rax), Dummy(RW(dst),zdi));}
|
|
#endif
|
|
void rep_stosb(const Reg& dst, const Reg8& src, const Reg& count) {AppendInstr(I_STOS_B, 0xAA, E_REP_PREFIX, Dummy(R(src),al), Dummy(RW(dst),zdi), Dummy(RW(count),zcx));}
|
|
void rep_stosw(const Reg& dst, const Reg16& src, const Reg& count) {AppendInstr(I_STOS_W, 0xAB, E_REP_PREFIX | E_OPERAND_SIZE_PREFIX, Dummy(R(src),ax), Dummy(RW(dst),zdi), Dummy(RW(count),zcx));}
|
|
void rep_stosd(const Reg& dst, const Reg32& src, const Reg& count) {AppendInstr(I_STOS_D, 0xAB, E_REP_PREFIX, Dummy(R(src),eax), Dummy(RW(dst),zdi), Dummy(RW(count),zcx));}
|
|
#ifdef JITASM64
|
|
void rep_stosq(const Reg& dst, const Reg64& src, const Reg& count) {AppendInstr(I_STOS_Q, 0xAB, E_REP_PREFIX | E_REXW_PREFIX, Dummy(R(src),rax), Dummy(RW(dst),zdi), Dummy(RW(count),zcx));}
|
|
#endif
|
|
void sub(const Reg8& dst, const Imm8& imm) {AppendInstr(I_SUB, 0x80, E_SPECIAL, Imm8(5), RW(dst), imm);}
|
|
void sub(const Mem8& dst, const Imm8& imm) {AppendInstr(I_SUB, 0x80, 0, Imm8(5), RW(dst), imm);}
|
|
void sub(const Reg16& dst, const Imm16& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Mem16& dst, const Imm16& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Reg32& dst, const Imm32& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Mem32& dst, const Imm32& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Reg8& dst, const Reg8& src) {AppendInstr(I_SUB, 0x28, 0, R(src), RW(dst));}
|
|
void sub(const Mem8& dst, const Reg8& src) {AppendInstr(I_SUB, 0x28, 0, R(src), RW(dst));}
|
|
void sub(const Reg8& dst, const Mem8& src) {AppendInstr(I_SUB, 0x2A, 0, RW(dst), R(src));}
|
|
void sub(const Reg16& dst, const Reg16& src) {AppendInstr(I_SUB, 0x29, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void sub(const Mem16& dst, const Reg16& src) {AppendInstr(I_SUB, 0x29, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void sub(const Reg16& dst, const Mem16& src) {AppendInstr(I_SUB, 0x2B, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void sub(const Reg32& dst, const Reg32& src) {AppendInstr(I_SUB, 0x29, 0, R(src), RW(dst));}
|
|
void sub(const Mem32& dst, const Reg32& src) {AppendInstr(I_SUB, 0x29, 0, R(src), RW(dst));}
|
|
void sub(const Reg32& dst, const Mem32& src) {AppendInstr(I_SUB, 0x2B, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void sub(const Reg64& dst, const Imm32& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Mem64& dst, const Imm32& imm) {AppendInstr(I_SUB, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(5), RW(dst), detail::ImmXor8(imm));}
|
|
void sub(const Reg64& dst, const Reg64& src) {AppendInstr(I_SUB, 0x29, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void sub(const Mem64& dst, const Reg64& src) {AppendInstr(I_SUB, 0x29, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void sub(const Reg64& dst, const Mem64& src) {AppendInstr(I_SUB, 0x2B, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
#ifndef JITASM64
|
|
void sysenter() {AppendInstr(I_SYSENTER, 0x0F34, 0);}
|
|
void sysexit() {AppendInstr(I_SYSEXIT, 0x0F35, 0);}
|
|
#else
|
|
void swapgs() {AppendInstr(I_SWAPGS, 0x0F01F8, 0);} // 0F 01 /7
|
|
void syscall() {AppendInstr(I_SYSCALL, 0x0F05, 0);}
|
|
void sysret() {AppendInstr(I_SYSRET, 0x0F07, 0);}
|
|
#endif
|
|
void test(const Reg8& src1, const Imm8& src2) {AppendInstr(I_TEST, 0xF6, E_SPECIAL, Imm8(0), R(src1), R(src2));}
|
|
void test(const Mem8& src1, const Imm8& src2) {AppendInstr(I_TEST, 0xF6, 0, Imm8(0), R(src1), R(src2));}
|
|
void test(const Reg16& src1, const Imm16& src2) {AppendInstr(I_TEST, 0xF7, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(0), R(src1), R(src2));}
|
|
void test(const Mem16& src1, const Imm16& src2) {AppendInstr(I_TEST, 0xF7, E_OPERAND_SIZE_PREFIX, Imm8(0), R(src1), R(src2));}
|
|
void test(const Reg32& src1, const Imm32& src2) {AppendInstr(I_TEST, 0xF7, E_SPECIAL, Imm8(0), R(src1), R(src2));}
|
|
void test(const Mem32& src1, const Imm32& src2) {AppendInstr(I_TEST, 0xF7, 0, Imm8(0), R(src1), R(src2));}
|
|
void test(const Reg8& src1, const Reg8& src2) {AppendInstr(I_TEST, 0x84, 0, R(src1), R(src2));}
|
|
void test(const Mem8& src1, const Reg8& src2) {AppendInstr(I_TEST, 0x84, 0, R(src2), R(src1));}
|
|
void test(const Reg16& src1, const Reg16& src2) {AppendInstr(I_TEST, 0x85, E_OPERAND_SIZE_PREFIX, R(src1), R(src2));}
|
|
void test(const Mem16& src1, const Reg16& src2) {AppendInstr(I_TEST, 0x85, E_OPERAND_SIZE_PREFIX, R(src2), R(src1));}
|
|
void test(const Reg32& src1, const Reg32& src2) {AppendInstr(I_TEST, 0x85, 0, R(src1), R(src2));}
|
|
void test(const Mem32& src1, const Reg32& src2) {AppendInstr(I_TEST, 0x85, 0, R(src2), R(src1));}
|
|
#ifdef JITASM64
|
|
void test(const Reg64& src1, const Imm32& src2) {AppendInstr(I_TEST, 0xF7, E_REXW_PREFIX | E_SPECIAL, Imm8(0), R(src1), R(src2));}
|
|
void test(const Mem64& src1, const Imm32& src2) {AppendInstr(I_TEST, 0xF7, E_REXW_PREFIX, Imm8(0), R(src1), R(src2));}
|
|
void test(const Reg64& src1, const Reg64& src2) {AppendInstr(I_TEST, 0x85, E_REXW_PREFIX, R(src1), R(src2));}
|
|
void test(const Mem64& src1, const Reg64& src2) {AppendInstr(I_TEST, 0x85, E_REXW_PREFIX, R(src2), R(src1));}
|
|
#endif
|
|
void ud2() {AppendInstr(I_UD2, 0x0F0B, 0);}
|
|
void verr(const Reg16& src) {AppendInstr(I_VERR, 0x0F00, 0, Imm8(4), R(src));}
|
|
void verr(const Mem16& src) {AppendInstr(I_VERR, 0x0F00, 0, Imm8(4), R(src));}
|
|
void verw(const Reg16& src) {AppendInstr(I_VERW, 0x0F00, 0, Imm8(5), R(src));}
|
|
void verw(const Mem16& src) {AppendInstr(I_VERW, 0x0F00, 0, Imm8(5), R(src));}
|
|
void wait() {AppendInstr(I_WAIT, 0x9B, 0);}
|
|
void wbinvd() {AppendInstr(I_WBINVD, 0x0F09, 0);}
|
|
void wrmsr() {AppendInstr(I_WRMSR, 0x0F30, 0, Dummy(R(ecx)), Dummy(R(edx)), Dummy(R(eax)));}
|
|
void xadd(const Reg8& dst, const Reg8& src) {AppendInstr(I_XADD, 0x0FC0, 0, RW(src), RW(dst));}
|
|
void xadd(const Mem8& dst, const Reg8& src) {AppendInstr(I_XADD, 0x0FC0, 0, RW(src), RW(dst));}
|
|
void xadd(const Reg16& dst, const Reg16& src) {AppendInstr(I_XADD, 0x0FC1, E_OPERAND_SIZE_PREFIX, RW(src), RW(dst));}
|
|
void xadd(const Mem16& dst, const Reg16& src) {AppendInstr(I_XADD, 0x0FC1, E_OPERAND_SIZE_PREFIX, RW(src), RW(dst));}
|
|
void xadd(const Reg32& dst, const Reg32& src) {AppendInstr(I_XADD, 0x0FC1, 0, RW(src), RW(dst));}
|
|
void xadd(const Mem32& dst, const Reg32& src) {AppendInstr(I_XADD, 0x0FC1, 0, RW(src), RW(dst));}
|
|
#ifdef JITASM64
|
|
void xadd(const Reg64& dst, const Reg64& src) {AppendInstr(I_XADD, 0x0FC1, E_REXW_PREFIX, RW(src), RW(dst));}
|
|
void xadd(const Mem64& dst, const Reg64& src) {AppendInstr(I_XADD, 0x0FC1, E_REXW_PREFIX, RW(src), RW(dst));}
|
|
#endif
|
|
void xchg(const Reg8& dst, const Reg8& src) {AppendInstr(I_XCHG, 0x86, 0, RW(dst), RW(src));}
|
|
void xchg(const Mem8& dst, const Reg8& src) {AppendInstr(I_XCHG, 0x86, 0, RW(src), RW(dst));}
|
|
void xchg(const Reg8& dst, const Mem8& src) {AppendInstr(I_XCHG, 0x86, 0, RW(dst), RW(src));}
|
|
void xchg(const Reg16& dst, const Reg16& src) {AppendInstr(I_XCHG, 0x87, E_OPERAND_SIZE_PREFIX | E_SPECIAL, RW(dst), RW(src));}
|
|
void xchg(const Mem16& dst, const Reg16& src) {AppendInstr(I_XCHG, 0x87, E_OPERAND_SIZE_PREFIX, RW(src), RW(dst));}
|
|
void xchg(const Reg16& dst, const Mem16& src) {AppendInstr(I_XCHG, 0x87, E_OPERAND_SIZE_PREFIX, RW(dst), RW(src));}
|
|
void xchg(const Reg32& dst, const Reg32& src) {AppendInstr(I_XCHG, 0x87, E_SPECIAL, RW(dst), RW(src));}
|
|
void xchg(const Mem32& dst, const Reg32& src) {AppendInstr(I_XCHG, 0x87, 0, RW(src), RW(dst));}
|
|
void xchg(const Reg32& dst, const Mem32& src) {AppendInstr(I_XCHG, 0x87, 0, RW(dst), RW(src));}
|
|
#ifdef JITASM64
|
|
void xchg(const Reg64& dst, const Reg64& src) {AppendInstr(I_XCHG, 0x87, E_REXW_PREFIX | E_SPECIAL, RW(dst), RW(src));}
|
|
void xchg(const Mem64& dst, const Reg64& src) {AppendInstr(I_XCHG, 0x87, E_REXW_PREFIX, RW(src), RW(dst));}
|
|
void xchg(const Reg64& dst, const Mem64& src) {AppendInstr(I_XCHG, 0x87, E_REXW_PREFIX, RW(dst), RW(src));}
|
|
#endif
|
|
void xgetbv() {AppendInstr(I_XGETBV, 0x0F01D0, 0, Dummy(R(ecx)), Dummy(W(edx)), Dummy(W(eax)));}
|
|
void xlatb() {AppendInstr(I_XLATB, 0xD7, 0, Dummy(RW(al)), Dummy(R(ebx)));}
|
|
void xor_(const Reg8& dst, const Imm8& imm) {AppendInstr(I_XOR, 0x80, E_SPECIAL, Imm8(6), RW(dst), imm);}
|
|
void xor_(const Mem8& dst, const Imm8& imm) {AppendInstr(I_XOR, 0x80, 0, Imm8(6), RW(dst), imm);}
|
|
void xor_(const Reg16& dst, const Imm16& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX | E_SPECIAL, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Mem16& dst, const Imm16& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_OPERAND_SIZE_PREFIX, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Reg32& dst, const Imm32& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_SPECIAL, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Mem32& dst, const Imm32& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, 0, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Reg8& dst, const Reg8& src) {AppendInstr(I_XOR, 0x30, 0, R(src), RW(dst));}
|
|
void xor_(const Mem8& dst, const Reg8& src) {AppendInstr(I_XOR, 0x30, 0, R(src), RW(dst));}
|
|
void xor_(const Reg8& dst, const Mem8& src) {AppendInstr(I_XOR, 0x32, 0, RW(dst), R(src));}
|
|
void xor_(const Reg16& dst, const Reg16& src) {AppendInstr(I_XOR, 0x31, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void xor_(const Mem16& dst, const Reg16& src) {AppendInstr(I_XOR, 0x31, E_OPERAND_SIZE_PREFIX, R(src), RW(dst));}
|
|
void xor_(const Reg16& dst, const Mem16& src) {AppendInstr(I_XOR, 0x33, E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void xor_(const Reg32& dst, const Reg32& src) {AppendInstr(I_XOR, 0x31, 0, R(src), RW(dst));}
|
|
void xor_(const Mem32& dst, const Reg32& src) {AppendInstr(I_XOR, 0x31, 0, R(src), RW(dst));}
|
|
void xor_(const Reg32& dst, const Mem32& src) {AppendInstr(I_XOR, 0x33, 0, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void xor_(const Reg64& dst, const Imm32& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX | E_SPECIAL, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Mem64& dst, const Imm32& imm) {AppendInstr(I_XOR, detail::IsInt8(imm.GetImm()) ? 0x83 : 0x81, E_REXW_PREFIX, Imm8(6), RW(dst), detail::ImmXor8(imm));}
|
|
void xor_(const Reg64& dst, const Reg64& src) {AppendInstr(I_XOR, 0x31, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void xor_(const Mem64& dst, const Reg64& src) {AppendInstr(I_XOR, 0x31, E_REXW_PREFIX, R(src), RW(dst));}
|
|
void xor_(const Reg64& dst, const Mem64& src) {AppendInstr(I_XOR, 0x33, E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
|
|
// x87 Floating-Point Instructions
|
|
void f2xm1() {AppendInstr(I_F2XM1, 0xD9F0, 0);}
|
|
void fabs() {AppendInstr(I_FABS, 0xD9E1, 0);}
|
|
void fadd(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FADD, 0xD8C0, 0, src); avoid_unused_warn(dst);}
|
|
void fadd(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FADD, 0xDCC0, 0, dst); avoid_unused_warn(src);}
|
|
void fadd(const Mem32& dst) {AppendInstr(I_FADD, 0xD8, 0, Imm8(0), dst);}
|
|
void fadd(const Mem64& dst) {AppendInstr(I_FADD, 0xDC, 0, Imm8(0), dst);}
|
|
void faddp() {AppendInstr(I_FADDP, 0xDEC1, 0);}
|
|
void faddp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FADDP, 0xDEC0, 0, dst); avoid_unused_warn(src);}
|
|
void fiadd(const Mem16& dst) {AppendInstr(I_FIADD, 0xDE, 0, Imm8(0), dst);}
|
|
void fiadd(const Mem32& dst) {AppendInstr(I_FIADD, 0xDA, 0, Imm8(0), dst);}
|
|
void fbld(const Mem80& dst) {AppendInstr(I_FBLD, 0xDF, 0, Imm8(4), dst);}
|
|
void fbstp(const Mem80& dst) {AppendInstr(I_FBSTP, 0xDF, 0, Imm8(6), dst);}
|
|
void fchs() {AppendInstr(I_FCHS, 0xD9E0, 0);}
|
|
void fclex() {AppendInstr(I_FCLEX, 0x9BDBE2, 0);}
|
|
void fnclex() {AppendInstr(I_FNCLEX, 0xDBE2, 0);}
|
|
void fcmovb(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDAC0, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovbe(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDAD0, 0, src); avoid_unused_warn(dst);}
|
|
void fcmove(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDAC8, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovnb(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDBC0, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovnbe(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDBD0, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovne(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDBC8, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovnu(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDBD8, 0, src); avoid_unused_warn(dst);}
|
|
void fcmovu(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCMOVCC, 0xDAD8, 0, src); avoid_unused_warn(dst);}
|
|
void fcom() {AppendInstr(I_FCOM, 0xD8D1, 0);}
|
|
void fcom(const FpuReg& dst) {AppendInstr(I_FCOM, 0xD8D0, 0, dst);}
|
|
void fcom(const Mem32& dst) {AppendInstr(I_FCOM, 0xD8, 0, Imm8(2), dst);}
|
|
void fcom(const Mem64& dst) {AppendInstr(I_FCOM, 0xDC, 0, Imm8(2), dst);}
|
|
void fcomp() {AppendInstr(I_FCOMP, 0xD8D9, 0);}
|
|
void fcomp(const FpuReg& dst) {AppendInstr(I_FCOMP, 0xD8D8, 0, dst);}
|
|
void fcomp(const Mem32& dst) {AppendInstr(I_FCOMP, 0xD8, 0, Imm8(3), dst);}
|
|
void fcomp(const Mem64& dst) {AppendInstr(I_FCOMP, 0xDC, 0, Imm8(3), dst);}
|
|
void fcompp() {AppendInstr(I_FCOMPP, 0xDED9, 0);}
|
|
void fcomi(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCOMI, 0xDBF0, 0, src); avoid_unused_warn(dst);}
|
|
void fcomip(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FCOMIP, 0xDFF0, 0, src); avoid_unused_warn(dst);}
|
|
void fcos() {AppendInstr(I_FCOS, 0xD9FF, 0);}
|
|
void fdecstp() {AppendInstr(I_FDECSTP, 0xD9F6, 0);}
|
|
void fdiv(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FDIV, 0xD8F0, 0, src); avoid_unused_warn(dst);}
|
|
void fdiv(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FDIV, 0xDCF8, 0, dst); avoid_unused_warn(src);}
|
|
void fdiv(const Mem32& dst) {AppendInstr(I_FDIV, 0xD8, 0, Imm8(6), dst);}
|
|
void fdiv(const Mem64& dst) {AppendInstr(I_FDIV, 0xDC, 0, Imm8(6), dst);}
|
|
void fdivp() {AppendInstr(I_FDIVP, 0xDEF9, 0);}
|
|
void fdivp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FDIVP, 0xDEF8, 0, dst); avoid_unused_warn(src);}
|
|
void fidiv(const Mem16& dst) {AppendInstr(I_FIDIV, 0xDE, 0, Imm8(6), dst);}
|
|
void fidiv(const Mem32& dst) {AppendInstr(I_FIDIV, 0xDA, 0, Imm8(6), dst);}
|
|
void fdivr(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FDIVR, 0xD8F8, 0, src); avoid_unused_warn(dst);}
|
|
void fdivr(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FDIVR, 0xDCF0, 0, dst); avoid_unused_warn(src);}
|
|
void fdivr(const Mem32& dst) {AppendInstr(I_FDIVR, 0xD8, 0, Imm8(7), dst);}
|
|
void fdivr(const Mem64& dst) {AppendInstr(I_FDIVR, 0xDC, 0, Imm8(7), dst);}
|
|
void fdivrp() {AppendInstr(I_FDIVRP, 0xDEF1, 0);}
|
|
void fdivrp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FDIVRP, 0xDEF0, 0, dst); avoid_unused_warn(src);}
|
|
void fidivr(const Mem16& dst) {AppendInstr(I_FIDIVR, 0xDE, 0, Imm8(7), dst);}
|
|
void fidivr(const Mem32& dst) {AppendInstr(I_FIDIVR, 0xDA, 0, Imm8(7), dst);}
|
|
void ffree(const FpuReg& dst) {AppendInstr(I_FFREE, 0xDDC0, 0, dst);}
|
|
void ficom(const Mem16& dst) {AppendInstr(I_FICOM, 0xDE, 0, Imm8(2), dst);}
|
|
void ficom(const Mem32& dst) {AppendInstr(I_FICOM, 0xDA, 0, Imm8(2), dst);}
|
|
void ficomp(const Mem16& dst) {AppendInstr(I_FICOMP, 0xDE, 0, Imm8(3), dst);}
|
|
void ficomp(const Mem32& dst) {AppendInstr(I_FICOMP, 0xDA, 0, Imm8(3), dst);}
|
|
void fild(const Mem16& dst) {AppendInstr(I_FILD, 0xDF, 0, Imm8(0), dst);}
|
|
void fild(const Mem32& dst) {AppendInstr(I_FILD, 0xDB, 0, Imm8(0), dst);}
|
|
void fild(const Mem64& dst) {AppendInstr(I_FILD, 0xDF, 0, Imm8(5), dst);}
|
|
void fincstp() {AppendInstr(I_FINCSTP, 0xD9F7, 0);}
|
|
void finit() {AppendInstr(I_FINIT, 0x9BDBE3, 0);}
|
|
void fninit() {AppendInstr(I_FNINIT, 0xDBE3, 0);}
|
|
void fist(const Mem16& dst) {AppendInstr(I_FIST, 0xDF, 0, Imm8(2), dst);}
|
|
void fist(const Mem32& dst) {AppendInstr(I_FIST, 0xDB, 0, Imm8(2), dst);}
|
|
void fistp(const Mem16& dst) {AppendInstr(I_FISTP, 0xDF, 0, Imm8(3), dst);}
|
|
void fistp(const Mem32& dst) {AppendInstr(I_FISTP, 0xDB, 0, Imm8(3), dst);}
|
|
void fistp(const Mem64& dst) {AppendInstr(I_FISTP, 0xDF, 0, Imm8(7), dst);}
|
|
void fisttp(const Mem16& dst) {AppendInstr(I_FISTP, 0xDF, 0, Imm8(1), dst);}
|
|
void fisttp(const Mem32& dst) {AppendInstr(I_FISTP, 0xDB, 0, Imm8(1), dst);}
|
|
void fisttp(const Mem64& dst) {AppendInstr(I_FISTP, 0xDD, 0, Imm8(1), dst);}
|
|
void fld(const Mem32& src) {AppendInstr(I_FLD, 0xD9, 0, Imm8(0), src);}
|
|
void fld(const Mem64& src) {AppendInstr(I_FLD, 0xDD, 0, Imm8(0), src);}
|
|
void fld(const Mem80& src) {AppendInstr(I_FLD, 0xDB, 0, Imm8(5), src);}
|
|
void fld(const FpuReg& src) {AppendInstr(I_FLD, 0xD9C0, 0, src);}
|
|
void fld1() {AppendInstr(I_FLD1, 0xD9E8, 0);}
|
|
void fldcw(const Mem16& src) {AppendInstr(I_FLDCW, 0xD9, 0, Imm8(5), src);}
|
|
void fldenv(const Mem224& src) {AppendInstr(I_FLDENV, 0xD9, 0, Imm8(4), src);}
|
|
void fldl2e() {AppendInstr(I_FLDL2E, 0xD9EA, 0);}
|
|
void fldl2t() {AppendInstr(I_FLDL2T, 0xD9E9, 0);}
|
|
void fldlg2() {AppendInstr(I_FLDLG2, 0xD9EC, 0);}
|
|
void fldln2() {AppendInstr(I_FLDLN2, 0xD9ED, 0);}
|
|
void fldpi() {AppendInstr(I_FLDPI, 0xD9EB, 0);}
|
|
void fldz() {AppendInstr(I_FLDZ, 0xD9EE, 0);}
|
|
void fmul(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FMUL, 0xD8C8, 0, src); avoid_unused_warn(dst);}
|
|
void fmul(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FMUL, 0xDCC8, 0, dst); avoid_unused_warn(src);}
|
|
void fmul(const Mem32& dst) {AppendInstr(I_FMUL, 0xD8, 0, Imm8(1), dst);}
|
|
void fmul(const Mem64& dst) {AppendInstr(I_FMUL, 0xDC, 0, Imm8(1), dst);}
|
|
void fmulp() {AppendInstr(I_FMULP, 0xDEC9, 0);}
|
|
void fmulp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FMULP, 0xDEC8, 0, dst); avoid_unused_warn(src);}
|
|
void fimul(const Mem16& dst) {AppendInstr(I_FIMUL, 0xDE, 0, Imm8(1), dst);}
|
|
void fimul(const Mem32& dst) {AppendInstr(I_FIMUL, 0xDA, 0, Imm8(1), dst);}
|
|
void fnop() {AppendInstr(I_FNOP, 0xD9D0, 0);}
|
|
void fpatan() {AppendInstr(I_FPATAN, 0xD9F3, 0);}
|
|
void fprem() {AppendInstr(I_FPREM, 0xD9F8, 0);}
|
|
void fprem1() {AppendInstr(I_FPREM1, 0xD9F5, 0);}
|
|
void fptan() {AppendInstr(I_FPTAN, 0xD9F2, 0);}
|
|
void frndint() {AppendInstr(I_FRNDINT, 0xD9FC, 0);}
|
|
void frstor(const Mem864& src) {AppendInstr(I_FRSTOR, 0xDD, 0, Imm8(4), src);}
|
|
void fsave(const Mem864& dst) {AppendInstr(I_FSAVE, 0x9BDD, 0, Imm8(6), dst);}
|
|
void fnsave(const Mem864& dst) {AppendInstr(I_FNSAVE, 0xDD, 0, Imm8(6), dst);}
|
|
void fscale() {AppendInstr(I_FSCALE, 0xD9FD, 0);}
|
|
void fsin() {AppendInstr(I_FSIN, 0xD9FE, 0);}
|
|
void fsincos() {AppendInstr(I_FSINCOS, 0xD9FB, 0);}
|
|
void fsqrt() {AppendInstr(I_FSQRT, 0xD9FA, 0);}
|
|
void fst(const Mem32& dst) {AppendInstr(I_FST, 0xD9, 0, Imm8(2), dst);}
|
|
void fst(const Mem64& dst) {AppendInstr(I_FST, 0xDD, 0, Imm8(2), dst);}
|
|
void fst(const FpuReg& dst) {AppendInstr(I_FST, 0xDDD0, 0, dst);}
|
|
void fstp(const FpuReg& dst) {AppendInstr(I_FSTP, 0xDDD8, 0, dst);}
|
|
void fstp(const Mem32& dst) {AppendInstr(I_FSTP, 0xD9, 0, Imm8(3), dst);}
|
|
void fstp(const Mem64& dst) {AppendInstr(I_FSTP, 0xDD, 0, Imm8(3), dst);}
|
|
void fstp(const Mem80& dst) {AppendInstr(I_FSTP, 0xDB, 0, Imm8(7), dst);}
|
|
void fstcw(const Mem16& dst) {AppendInstr(I_FSTCW, 0x9BD9, 0, Imm8(7), dst);}
|
|
void fnstcw(const Mem16& dst) {AppendInstr(I_FNSTCW, 0xD9, 0, Imm8(7), dst);}
|
|
void fstenv(const Mem224& dst) {AppendInstr(I_FSTENV, 0x9BD9, 0, Imm8(6), dst);}
|
|
void fnstenv(const Mem224& dst) {AppendInstr(I_FNSTENV, 0xD9, 0, Imm8(6), dst);}
|
|
void fstsw(const Mem16& dst) {AppendInstr(I_FSTSW, 0x9BDD, 0, Imm8(7), dst);}
|
|
void fstsw(const Reg16_ax& dst) {AppendInstr(I_FSTSW, 0x9BDFE0, 0); avoid_unused_warn(dst);}
|
|
void fnstsw(const Mem16& dst) {AppendInstr(I_FNSTSW, 0xDD, 0, Imm8(7), dst);}
|
|
void fnstsw(const Reg16_ax& dst) {AppendInstr(I_FNSTSW, 0xDFE0, 0); avoid_unused_warn(dst);}
|
|
void fsub(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FSUB, 0xD8E0, 0, src); avoid_unused_warn(dst);}
|
|
void fsub(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FSUB, 0xDCE8, 0, dst); avoid_unused_warn(src);}
|
|
void fsub(const Mem32& dst) {AppendInstr(I_FSUB, 0xD8, 0, Imm8(4), dst);}
|
|
void fsub(const Mem64& dst) {AppendInstr(I_FSUB, 0xDC, 0, Imm8(4), dst);}
|
|
void fsubp() {AppendInstr(I_FSUBP, 0xDEE9, 0);}
|
|
void fsubp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FSUBP, 0xDEE8, 0, dst); avoid_unused_warn(src);}
|
|
void fisub(const Mem16& dst) {AppendInstr(I_FISUB, 0xDE, 0, Imm8(4), dst);}
|
|
void fisub(const Mem32& dst) {AppendInstr(I_FISUB, 0xDA, 0, Imm8(4), dst);}
|
|
void fsubr(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FSUBR, 0xD8E8, 0, src); avoid_unused_warn(dst);}
|
|
void fsubr(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FSUBR, 0xDCE0, 0, dst); avoid_unused_warn(src);}
|
|
void fsubr(const Mem32& dst) {AppendInstr(I_FSUBR, 0xD8, 0, Imm8(5), dst);}
|
|
void fsubr(const Mem64& dst) {AppendInstr(I_FSUBR, 0xDC, 0, Imm8(5), dst);}
|
|
void fsubrp() {AppendInstr(I_FSUBRP, 0xDEE1, 0);}
|
|
void fsubrp(const FpuReg& dst, const FpuReg_st0& src) {AppendInstr(I_FSUBRP, 0xDEE0, 0, dst); avoid_unused_warn(src);}
|
|
void fisubr(const Mem16& dst) {AppendInstr(I_FISUBR, 0xDE, 0, Imm8(5), dst);}
|
|
void fisubr(const Mem32& dst) {AppendInstr(I_FISUBR, 0xDA, 0, Imm8(5), dst);}
|
|
void ftst() {AppendInstr(I_FTST, 0xD9E4, 0);}
|
|
void fucom() {AppendInstr(I_FUCOM, 0xDDE1, 0);}
|
|
void fucom(const FpuReg& dst) {AppendInstr(I_FUCOM, 0xDDE0, 0, dst);}
|
|
void fucomp() {AppendInstr(I_FUCOMP, 0xDDE9, 0);}
|
|
void fucomp(const FpuReg& dst) {AppendInstr(I_FUCOMP, 0xDDE8, 0, dst);}
|
|
void fucompp() {AppendInstr(I_FUCOMPP, 0xDAE9, 0);}
|
|
void fucomi(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FUCOMI, 0xDBE8, 0, src); avoid_unused_warn(dst);}
|
|
void fucomip(const FpuReg_st0& dst, const FpuReg& src) {AppendInstr(I_FUCOMIP, 0xDFE8, 0, src); avoid_unused_warn(dst);}
|
|
void fwait() {wait();}
|
|
void fxam() {AppendInstr(I_FXAM, 0xD9E5, 0);}
|
|
void fxch() {AppendInstr(I_FXCH, 0xD9C9, 0);}
|
|
void fxch(const FpuReg& dst) {AppendInstr(I_FXCH, 0xD9C8, 0, dst);}
|
|
void fxrstor(const Mem4096& src) {AppendInstr(I_FXRSTOR, 0x0FAE, 0, Imm8(1), src);}
|
|
void fxsave(const Mem4096& dst) {AppendInstr(I_FXSAVE, 0x0FAE, 0, Imm8(0), dst);}
|
|
void fxtract() {AppendInstr(I_FXTRACT, 0xD9F4, 0);}
|
|
void fyl2x() {AppendInstr(I_FYL2X, 0xD9F1, 0);}
|
|
void fyl2xp1() {AppendInstr(I_FYL2XP1, 0xD9F9, 0);}
|
|
|
|
// MMX
|
|
void emms() {AppendInstr(I_EMMS, 0x0F77, 0);}
|
|
void movd(const MmxReg& dst, const Reg32& src) {AppendInstr(I_MOVD, 0x0F6E, 0, W(dst), R(src));}
|
|
void movd(const MmxReg& dst, const Mem32& src) {AppendInstr(I_MOVD, 0x0F6E, 0, W(dst), R(src));}
|
|
void movd(const Reg32& dst, const MmxReg& src) {AppendInstr(I_MOVD, 0x0F7E, 0, R(src), W(dst));}
|
|
void movd(const Mem32& dst, const MmxReg& src) {AppendInstr(I_MOVD, 0x0F7E, 0, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movd(const MmxReg& dst, const Reg64& src) {AppendInstr(I_MOVD, 0x0F6E, E_REXW_PREFIX, W(dst), R(src));}
|
|
void movd(const Reg64& dst, const MmxReg& src) {AppendInstr(I_MOVD, 0x0F7E, E_REXW_PREFIX, R(src), W(dst));}
|
|
#endif
|
|
void movq(const MmxReg& dst, const Mem64& src) {AppendInstr(I_MOVQ, 0x0F6F, 0, W(dst), R(src));}
|
|
void movq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_MOVQ, 0x0F7F, 0, R(src), W(dst));}
|
|
void movq(const Mem64& dst, const MmxReg& src) {AppendInstr(I_MOVQ, 0x0F7F, 0, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movq(const MmxReg& dst, const Reg64& src) {movd(dst, src);}
|
|
void movq(const Reg64& dst, const MmxReg& src) {movd(dst, src);}
|
|
#endif
|
|
void packsswb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PACKSSWB, 0x0F63, 0, RW(dst), R(src));}
|
|
void packsswb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PACKSSWB, 0x0F63, 0, RW(dst), R(src));}
|
|
void packssdw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PACKSSDW, 0x0F6B, 0, RW(dst), R(src));}
|
|
void packssdw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PACKSSDW, 0x0F6B, 0, RW(dst), R(src));}
|
|
void packuswb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PACKUSWB, 0x0F67, 0, RW(dst), R(src));}
|
|
void packuswb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PACKUSWB, 0x0F67, 0, RW(dst), R(src));}
|
|
void paddb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDB, 0x0FFC, 0, RW(dst), R(src));}
|
|
void paddb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDB, 0x0FFC, 0, RW(dst), R(src));}
|
|
void paddw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDW, 0x0FFD, 0, RW(dst), R(src));}
|
|
void paddw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDW, 0x0FFD, 0, RW(dst), R(src));}
|
|
void paddd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDD, 0x0FFE, 0, RW(dst), R(src));}
|
|
void paddd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDD, 0x0FFE, 0, RW(dst), R(src));}
|
|
void paddsb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDSB, 0x0FEC, 0, RW(dst), R(src));}
|
|
void paddsb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDSB, 0x0FEC, 0, RW(dst), R(src));}
|
|
void paddsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDSW, 0x0FED, 0, RW(dst), R(src));}
|
|
void paddsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDSW, 0x0FED, 0, RW(dst), R(src));}
|
|
void paddusb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDUSB, 0x0FDC, 0, RW(dst), R(src));}
|
|
void paddusb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDUSB, 0x0FDC, 0, RW(dst), R(src));}
|
|
void paddusw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDUSW, 0x0FDD, 0, RW(dst), R(src));}
|
|
void paddusw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDUSW, 0x0FDD, 0, RW(dst), R(src));}
|
|
void pand(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PAND, 0x0FDB, 0, RW(dst), R(src));}
|
|
void pand(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PAND, 0x0FDB, 0, RW(dst), R(src));}
|
|
void pandn(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PANDN, 0x0FDF, 0, RW(dst), R(src));}
|
|
void pandn(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PANDN, 0x0FDF, 0, RW(dst), R(src));}
|
|
void pcmpeqb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPEQB, 0x0F74, 0, RW(dst), R(src));}
|
|
void pcmpeqb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPEQB, 0x0F74, 0, RW(dst), R(src));}
|
|
void pcmpeqw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPEQW, 0x0F75, 0, RW(dst), R(src));}
|
|
void pcmpeqw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPEQW, 0x0F75, 0, RW(dst), R(src));}
|
|
void pcmpeqd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPEQD, 0x0F76, 0, RW(dst), R(src));}
|
|
void pcmpeqd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPEQD, 0x0F76, 0, RW(dst), R(src));}
|
|
void pcmpgtb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPGTB, 0x0F64, 0, RW(dst), R(src));}
|
|
void pcmpgtb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPGTB, 0x0F64, 0, RW(dst), R(src));}
|
|
void pcmpgtw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPGTW, 0x0F65, 0, RW(dst), R(src));}
|
|
void pcmpgtw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPGTW, 0x0F65, 0, RW(dst), R(src));}
|
|
void pcmpgtd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PCMPGTD, 0x0F66, 0, RW(dst), R(src));}
|
|
void pcmpgtd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PCMPGTD, 0x0F66, 0, RW(dst), R(src));}
|
|
void pmaddwd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMADDWD, 0x0FF5, 0, RW(dst), R(src));}
|
|
void pmaddwd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMADDWD, 0x0FF5, 0, RW(dst), R(src));}
|
|
void pmulhw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMULHW, 0x0FE5, 0, RW(dst), R(src));}
|
|
void pmulhw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMULHW, 0x0FE5, 0, RW(dst), R(src));}
|
|
void pmullw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMULLW, 0x0FD5, 0, RW(dst), R(src));}
|
|
void pmullw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMULLW, 0x0FD5, 0, RW(dst), R(src));}
|
|
void por(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_POR, 0x0FEB, 0, RW(dst), R(src));}
|
|
void por(const MmxReg& dst, const Mem64& src) {AppendInstr(I_POR, 0x0FEB, 0, RW(dst), R(src));}
|
|
void psllw(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSLLW, 0x0FF1, 0, RW(dst), R(count));}
|
|
void psllw(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSLLW, 0x0FF1, 0, RW(dst), R(count));}
|
|
void psllw(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSLLW, 0x0F71, 0, Imm8(6), RW(dst), count);}
|
|
void pslld(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSLLD, 0x0FF2, 0, RW(dst), R(count));}
|
|
void pslld(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSLLD, 0x0FF2, 0, RW(dst), R(count));}
|
|
void pslld(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSLLD, 0x0F72, 0, Imm8(6), RW(dst), count);}
|
|
void psllq(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSLLQ, 0x0FF3, 0, RW(dst), R(count));}
|
|
void psllq(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSLLQ, 0x0FF3, 0, RW(dst), R(count));}
|
|
void psllq(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSLLQ, 0x0F73, 0, Imm8(6), RW(dst), count);}
|
|
void psraw(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSRAW, 0x0FE1, 0, RW(dst), R(count));}
|
|
void psraw(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSRAW, 0x0FE1, 0, RW(dst), R(count));}
|
|
void psraw(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSRAW, 0x0F71, 0, Imm8(4), RW(dst), count);}
|
|
void psrad(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSRAD, 0x0FE2, 0, RW(dst), R(count));}
|
|
void psrad(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSRAD, 0x0FE2, 0, RW(dst), R(count));}
|
|
void psrad(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSRAD, 0x0F72, 0, Imm8(4), RW(dst), count);}
|
|
void psrlw(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSRLW, 0x0FD1, 0, RW(dst), R(count));}
|
|
void psrlw(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSRLW, 0x0FD1, 0, RW(dst), R(count));}
|
|
void psrlw(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSRLW, 0x0F71, 0, Imm8(2), RW(dst), count);}
|
|
void psrld(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSRLD, 0x0FD2, 0, RW(dst), R(count));}
|
|
void psrld(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSRLD, 0x0FD2, 0, RW(dst), R(count));}
|
|
void psrld(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSRLD, 0x0F72, 0, Imm8(2), RW(dst), count);}
|
|
void psrlq(const MmxReg& dst, const MmxReg& count) {AppendInstr(I_PSRLQ, 0x0FD3, 0, RW(dst), R(count));}
|
|
void psrlq(const MmxReg& dst, const Mem64& count) {AppendInstr(I_PSRLQ, 0x0FD3, 0, RW(dst), R(count));}
|
|
void psrlq(const MmxReg& dst, const Imm8& count) {AppendInstr(I_PSRLQ, 0x0F73, 0, Imm8(2), RW(dst), count);}
|
|
void psubb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBB, 0x0FF8, 0, RW(dst), R(src));}
|
|
void psubb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBB, 0x0FF8, 0, RW(dst), R(src));}
|
|
void psubw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBW, 0x0FF9, 0, RW(dst), R(src));}
|
|
void psubw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBW, 0x0FF9, 0, RW(dst), R(src));}
|
|
void psubd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBD, 0x0FFA, 0, RW(dst), R(src));}
|
|
void psubd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBD, 0x0FFA, 0, RW(dst), R(src));}
|
|
void psubsb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBSB, 0x0FE8, 0, RW(dst), R(src));}
|
|
void psubsb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBSB, 0x0FE8, 0, RW(dst), R(src));}
|
|
void psubsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBSW, 0x0FE9, 0, RW(dst), R(src));}
|
|
void psubsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBSW, 0x0FE9, 0, RW(dst), R(src));}
|
|
void psubusb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBUSB, 0x0FD8, 0, RW(dst), R(src));}
|
|
void psubusb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBUSB, 0x0FD8, 0, RW(dst), R(src));}
|
|
void psubusw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBUSW, 0x0FD9, 0, RW(dst), R(src));}
|
|
void psubusw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBUSW, 0x0FD9, 0, RW(dst), R(src));}
|
|
void punpckhbw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKHBW, 0x0F68, 0, RW(dst), R(src));}
|
|
void punpckhbw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PUNPCKHBW, 0x0F68, 0, RW(dst), R(src));}
|
|
void punpckhwd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKHWD, 0x0F69, 0, RW(dst), R(src));}
|
|
void punpckhwd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PUNPCKHWD, 0x0F69, 0, RW(dst), R(src));}
|
|
void punpckhdq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKHDQ, 0x0F6A, 0, RW(dst), R(src));}
|
|
void punpckhdq(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PUNPCKHDQ, 0x0F6A, 0, RW(dst), R(src));}
|
|
void punpcklbw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKLBW, 0x0F60, 0, RW(dst), R(src));}
|
|
void punpcklbw(const MmxReg& dst, const Mem32& src) {AppendInstr(I_PUNPCKLBW, 0x0F60, 0, RW(dst), R(src));}
|
|
void punpcklwd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKLWD, 0x0F61, 0, RW(dst), R(src));}
|
|
void punpcklwd(const MmxReg& dst, const Mem32& src) {AppendInstr(I_PUNPCKLWD, 0x0F61, 0, RW(dst), R(src));}
|
|
void punpckldq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PUNPCKLDQ, 0x0F62, 0, RW(dst), R(src));}
|
|
void punpckldq(const MmxReg& dst, const Mem32& src) {AppendInstr(I_PUNPCKLDQ, 0x0F62, 0, RW(dst), R(src));}
|
|
void pxor(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PXOR, 0x0FEF, 0, RW(dst), R(src));}
|
|
void pxor(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PXOR, 0x0FEF, 0, RW(dst), R(src));}
|
|
|
|
// MMX2
|
|
void pavgb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PAVGB, 0x0FE0, 0, RW(dst), R(src));}
|
|
void pavgb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PAVGB, 0x0FE0, 0, RW(dst), R(src));}
|
|
void pavgw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PAVGW, 0x0FE3, 0, RW(dst), R(src));}
|
|
void pavgw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PAVGW, 0x0FE3, 0, RW(dst), R(src));}
|
|
void pextrw(const Reg32& dst, const MmxReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x0FC5, 0, W(dst), R(src), i);}
|
|
#ifdef JITASM64
|
|
void pextrw(const Reg64& dst, const MmxReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x0FC5, E_REXW_PREFIX, W(dst), R(src), i);}
|
|
#endif
|
|
void pinsrw(const MmxReg& dst, const Reg32& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, 0, RW(dst), R(src), i);}
|
|
void pinsrw(const MmxReg& dst, const Mem16& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, 0, RW(dst), R(src), i);}
|
|
#ifdef JITASM64
|
|
void pinsrw(const MmxReg& dst, const Reg64& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, E_REXW_PREFIX, RW(dst), R(src), i);}
|
|
#endif
|
|
void pmaxsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMAXSW, 0x0FEE, 0, RW(dst), R(src));}
|
|
void pmaxsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMAXSW, 0x0FEE, 0, RW(dst), R(src));}
|
|
void pmaxub(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMAXUB, 0x0FDE, 0, RW(dst), R(src));}
|
|
void pmaxub(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMAXUB, 0x0FDE, 0, RW(dst), R(src));}
|
|
void pminsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMINSW, 0x0FEA, 0, RW(dst), R(src));}
|
|
void pminsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMINSW, 0x0FEA, 0, RW(dst), R(src));}
|
|
void pminub(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMINUB, 0x0FDA, 0, RW(dst), R(src));}
|
|
void pminub(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMINUB, 0x0FDA, 0, RW(dst), R(src));}
|
|
void pmovmskb(const Reg32& dst, const MmxReg& src) {AppendInstr(I_PMOVMSKB, 0x0FD7, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void pmovmskb(const Reg64& dst, const MmxReg& src) {AppendInstr(I_PMOVMSKB, 0x0FD7, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void pmulhuw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMULHUW, 0x0FE4, 0, RW(dst), R(src));}
|
|
void pmulhuw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMULHUW, 0x0FE4, 0, RW(dst), R(src));}
|
|
void psadbw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSADBW, 0x0FF6, 0, RW(dst), R(src));}
|
|
void psadbw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSADBW, 0x0FF6, 0, RW(dst), R(src));}
|
|
void pshufw(const MmxReg& dst, const MmxReg& src, const Imm8& order) {AppendInstr(I_PSHUFW, 0x0F70, 0, RW(dst), R(src), order);}
|
|
void pshufw(const MmxReg& dst, const Mem64& src, const Imm8& order) {AppendInstr(I_PSHUFW, 0x0F70, 0, RW(dst), R(src), order);}
|
|
|
|
// SSE
|
|
void addps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDPS, 0x0F58, 0, RW(dst), R(src));}
|
|
void addps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ADDPS, 0x0F58, 0, RW(dst), R(src));}
|
|
void addss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDSS, 0x0F58, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void addss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_ADDSS, 0x0F58, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void andps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ANDPS, 0x0F54, 0, RW(dst), R(src));}
|
|
void andps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ANDPS, 0x0F54, 0, RW(dst), R(src));}
|
|
void andnps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ANDNPS, 0x0F55, 0, RW(dst), R(src));}
|
|
void andnps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ANDNPS, 0x0F55, 0, RW(dst), R(src));}
|
|
void cmpps(const XmmReg& dst, const XmmReg& src, const Imm8& opd3) {AppendInstr(I_CMPPS, 0x0FC2, 0, RW(dst), R(src), opd3);}
|
|
void cmpps(const XmmReg& dst, const Mem128& src, const Imm8& opd3) {AppendInstr(I_CMPPS, 0x0FC2, 0, RW(dst), R(src), opd3);}
|
|
void cmpeqps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 0);}
|
|
void cmpeqps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 0);}
|
|
void cmpltps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 1);}
|
|
void cmpltps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 1);}
|
|
void cmpleps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 2);}
|
|
void cmpleps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 2);}
|
|
void cmpunordps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 3);}
|
|
void cmpunordps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 3);}
|
|
void cmpneqps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 4);}
|
|
void cmpneqps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 4);}
|
|
void cmpnltps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 5);}
|
|
void cmpnltps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 5);}
|
|
void cmpnleps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 6);}
|
|
void cmpnleps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 6);}
|
|
void cmpordps(const XmmReg& dst, const XmmReg& src) {cmpps(dst, src, 7);}
|
|
void cmpordps(const XmmReg& dst, const Mem128& src) {cmpps(dst, src, 7);}
|
|
void cmpss(const XmmReg& dst, const XmmReg& src, const Imm8& opd3) {AppendInstr(I_CMPSS, 0x0FC2, E_MANDATORY_PREFIX_F3, RW(dst), R(src), opd3);}
|
|
void cmpss(const XmmReg& dst, const Mem32& src, const Imm8& opd3) {AppendInstr(I_CMPSS, 0x0FC2, E_MANDATORY_PREFIX_F3, RW(dst), R(src), opd3);}
|
|
void cmpeqss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 0);}
|
|
void cmpeqss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 0);}
|
|
void cmpltss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 1);}
|
|
void cmpltss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 1);}
|
|
void cmpless(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 2);}
|
|
void cmpless(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 2);}
|
|
void cmpunordss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 3);}
|
|
void cmpunordss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 3);}
|
|
void cmpneqss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 4);}
|
|
void cmpneqss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 4);}
|
|
void cmpnltss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 5);}
|
|
void cmpnltss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 5);}
|
|
void cmpnless(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 6);}
|
|
void cmpnless(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 6);}
|
|
void cmpordss(const XmmReg& dst, const XmmReg& src) {cmpss(dst, src, 7);}
|
|
void cmpordss(const XmmReg& dst, const Mem32& src) {cmpss(dst, src, 7);}
|
|
void comiss(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_COMISS, 0x0F2F, 0, R(src1), R(src2));}
|
|
void comiss(const XmmReg& src1, const Mem32& src2) {AppendInstr(I_COMISS, 0x0F2F, 0, R(src1), R(src2));}
|
|
void cvtpi2ps(const XmmReg& dst, const MmxReg& src) {AppendInstr(I_CVTPI2PS, 0x0F2A, 0, RW(dst), R(src));}
|
|
void cvtpi2ps(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTPI2PS, 0x0F2A, 0, RW(dst), R(src));}
|
|
void cvtps2pi(const MmxReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2PI, 0x0F2D, 0, W(dst), R(src));}
|
|
void cvtps2pi(const MmxReg& dst, const Mem64& src) {AppendInstr(I_CVTPS2PI, 0x0F2D, 0, W(dst), R(src));}
|
|
void cvtsi2ss(const XmmReg& dst, const Reg32& src) {AppendInstr(I_CVTSI2SS, 0x0F2A, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void cvtsi2ss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_CVTSI2SS, 0x0F2A, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void cvtss2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x0F2D, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvtss2si(const Reg32& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x0F2D, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvttps2pi(const MmxReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPS2PI, 0x0F2C, 0, W(dst), R(src));}
|
|
void cvttps2pi(const MmxReg& dst, const Mem64& src) {AppendInstr(I_CVTTPS2PI, 0x0F2C, 0, W(dst), R(src));}
|
|
void cvttss2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTTSS2SI, 0x0F2C, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvttss2si(const Reg32& dst, const Mem32& src) {AppendInstr(I_CVTTSS2SI, 0x0F2C, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void cvtsi2ss(const XmmReg& dst, const Reg64& src) {AppendInstr(I_CVTSI2SS, 0x0F2A, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cvtsi2ss(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTSI2SS, 0x0F2A, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cvtss2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x0F2D, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void cvtss2si(const Reg64& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x0F2D, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void cvttss2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTTSS2SI, 0x0F2C, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void cvttss2si(const Reg64& dst, const Mem32& src) {AppendInstr(I_CVTTSS2SI, 0x0F2C, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void divps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_DIVPS, 0x0F5E, 0, RW(dst), R(src));}
|
|
void divps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_DIVPS, 0x0F5E, 0, RW(dst), R(src));}
|
|
void divss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_DIVSS, 0x0F5E, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void divss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_DIVSS, 0x0F5E, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void ldmxcsr(const Mem32& src) {AppendInstr(I_LDMXCSR, 0x0FAE, 0, Imm8(2), R(src));}
|
|
void maskmovq(const MmxReg& src, const MmxReg& mask, const Reg& dstptr) {AppendInstr(I_MASKMOVQ, 0x0FF7, 0, R(src), R(mask), Dummy(R(dstptr),zdi));}
|
|
void maxps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MAXPS, 0x0F5F, 0, RW(dst), R(src));}
|
|
void maxps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MAXPS, 0x0F5F, 0, RW(dst), R(src));}
|
|
void maxss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MAXSS, 0x0F5F, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void maxss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MAXSS, 0x0F5F, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void minps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MINPS, 0x0F5D, 0, RW(dst), R(src));}
|
|
void minps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MINPS, 0x0F5D, 0, RW(dst), R(src));}
|
|
void minss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MINSS, 0x0F5D, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void minss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MINSS, 0x0F5D, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void movaps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVAPS, 0x0F28, 0, W(dst), R(src));}
|
|
void movaps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVAPS, 0x0F28, 0, W(dst), R(src));}
|
|
void movaps(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVAPS, 0x0F29, 0, R(src), W(dst));}
|
|
void movhlps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVHLPS, 0x0F12, 0, RW(dst), R(src));}
|
|
void movhps(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVHPS, 0x0F16, 0, RW(dst), R(src));}
|
|
void movhps(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVHPS, 0x0F17, 0, R(src), W(dst));}
|
|
void movlhps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVLHPS, 0x0F16, 0, RW(dst), R(src));}
|
|
void movlps(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVLPS, 0x0F12, 0, RW(dst), R(src));}
|
|
void movlps(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVLPS, 0x0F13, 0, R(src), W(dst));}
|
|
void movmskps(const Reg32& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPS, 0x0F50, 0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void movmskps(const Reg64& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPS, 0x0F50, E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void movntps(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTPS, 0x0F2B, 0, R(src), W(dst));}
|
|
void movntq(const Mem64& dst, const MmxReg& src) {AppendInstr(I_MOVNTQ, 0x0FE7, 0, R(src), W(dst));}
|
|
void movss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSS, 0x0F10, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void movss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MOVSS, 0x0F10, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movss(const Mem32& dst, const XmmReg& src) {AppendInstr(I_MOVSS, 0x0F11, E_MANDATORY_PREFIX_F3, R(src), W(dst));}
|
|
void movups(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVUPS, 0x0F10, 0, W(dst), R(src));}
|
|
void movups(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVUPS, 0x0F10, 0, W(dst), R(src));}
|
|
void movups(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVUPS, 0x0F11, 0, R(src), W(dst));}
|
|
void mulps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MULPS, 0x0F59, 0, RW(dst), R(src));}
|
|
void mulps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MULPS, 0x0F59, 0, RW(dst), R(src));}
|
|
void mulss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MULSS, 0x0F59, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void mulss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MULSS, 0x0F59, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void orps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ORPS, 0x0F56, 0, RW(dst), R(src));}
|
|
void orps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ORPS, 0x0F56, 0, RW(dst), R(src));}
|
|
void prefetcht0(const Mem8& mem) {AppendInstr(I_PREFETCH, 0x0F18, 0, Imm8(1), R(mem));}
|
|
void prefetcht1(const Mem8& mem) {AppendInstr(I_PREFETCH, 0x0F18, 0, Imm8(2), R(mem));}
|
|
void prefetcht2(const Mem8& mem) {AppendInstr(I_PREFETCH, 0x0F18, 0, Imm8(3), R(mem));}
|
|
void prefetchnta(const Mem8& mem) {AppendInstr(I_PREFETCH, 0x0F18, 0, Imm8(0), R(mem));}
|
|
void rcpps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RCPPS, 0x0F53, 0, W(dst), R(src));}
|
|
void rcpps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_RCPPS, 0x0F53, 0, W(dst), R(src));}
|
|
void rcpss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RCPSS, 0x0F53, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void rcpss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_RCPSS, 0x0F53, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void rsqrtps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RSQRTPS, 0x0F52, 0, W(dst), R(src));}
|
|
void rsqrtps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_RSQRTPS, 0x0F52, 0, W(dst), R(src));}
|
|
void rsqrtss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RSQRTSS, 0x0F52, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void rsqrtss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_RSQRTSS, 0x0F52, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void sfence() {AppendInstr(I_SFENCE, 0x0FAEF8, 0);}
|
|
void shufps(const XmmReg& dst, const XmmReg& src, const Imm8& sel) {AppendInstr(I_SHUFPS, 0x0FC6, 0, RW(dst), R(src), sel);}
|
|
void shufps(const XmmReg& dst, const Mem128& src, const Imm8& sel) {AppendInstr(I_SHUFPS, 0x0FC6, 0, RW(dst), R(src), sel);}
|
|
void sqrtps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTPS, 0x0F51, 0, W(dst), R(src));}
|
|
void sqrtps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SQRTPS, 0x0F51, 0, W(dst), R(src));}
|
|
void sqrtss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTSS, 0x0F51, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void sqrtss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_SQRTSS, 0x0F51, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void stmxcsr(const Mem32& dst) {AppendInstr(I_STMXCSR, 0x0FAE, 0, Imm8(3), W(dst));}
|
|
void subps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SUBPS, 0x0F5C, 0, RW(dst), R(src));}
|
|
void subps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SUBPS, 0x0F5C, 0, RW(dst), R(src));}
|
|
void subss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SUBSS, 0x0F5C, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void subss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_SUBSS, 0x0F5C, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void ucomiss(const XmmReg& src1, const XmmReg& src2){AppendInstr(I_UCOMISS, 0x0F2E, 0, R(src1), R(src2));}
|
|
void ucomiss(const XmmReg& src1, const Mem32& src2) {AppendInstr(I_UCOMISS, 0x0F2E, 0, R(src1), R(src2));}
|
|
void unpckhps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_UNPCKHPS, 0x0F15, 0, RW(dst), R(src));}
|
|
void unpckhps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_UNPCKHPS, 0x0F15, 0, RW(dst), R(src));}
|
|
void unpcklps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_UNPCKLPS, 0x0F14, 0, RW(dst), R(src));}
|
|
void unpcklps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_UNPCKLPS, 0x0F14, 0, RW(dst), R(src));}
|
|
void xorps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_XORPS, 0x0F57, 0, RW(dst), R(src));}
|
|
void xorps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_XORPS, 0x0F57, 0, RW(dst), R(src));}
|
|
|
|
// SSE2
|
|
void addpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDPD, 0x0F58, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void addpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ADDPD, 0x0F58, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void addsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDSD, 0x0F58, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void addsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_ADDSD, 0x0F58, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void andpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ANDPD, 0x0F54, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void andpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ANDPD, 0x0F54, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void andnpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ANDNPD, 0x0F55, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void andnpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ANDNPD, 0x0F55, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void clflush(const Mem8& src) {AppendInstr(I_CLFLUSH, 0x0FAE, 0, Imm8(7), R(src));}
|
|
void cmppd(const XmmReg& dst, const XmmReg& src, const Imm8& opd3) {AppendInstr(I_CMPPD, 0x0FC2, E_MANDATORY_PREFIX_66, RW(dst), R(src), opd3);}
|
|
void cmppd(const XmmReg& dst, const Mem128& src, const Imm8& opd3) {AppendInstr(I_CMPPD, 0x0FC2, E_MANDATORY_PREFIX_66, RW(dst), R(src), opd3);}
|
|
void cmpeqpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 0);}
|
|
void cmpeqpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 0);}
|
|
void cmpltpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 1);}
|
|
void cmpltpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 1);}
|
|
void cmplepd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 2);}
|
|
void cmplepd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 2);}
|
|
void cmpunordpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 3);}
|
|
void cmpunordpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 3);}
|
|
void cmpneqpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 4);}
|
|
void cmpneqpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 4);}
|
|
void cmpnltpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 5);}
|
|
void cmpnltpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 5);}
|
|
void cmpnlepd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 6);}
|
|
void cmpnlepd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 6);}
|
|
void cmpordpd(const XmmReg& dst, const XmmReg& src) {cmppd(dst, src, 7);}
|
|
void cmpordpd(const XmmReg& dst, const Mem128& src) {cmppd(dst, src, 7);}
|
|
void cmpsd(const XmmReg& dst, const XmmReg& src, const Imm8& opd3) {AppendInstr(I_CMPSD, 0x0FC2, E_MANDATORY_PREFIX_F2, RW(dst), R(src), opd3);}
|
|
void cmpsd(const XmmReg& dst, const Mem64& src, const Imm8& opd3) {AppendInstr(I_CMPSD, 0x0FC2, E_MANDATORY_PREFIX_F2, RW(dst), R(src), opd3);}
|
|
void cmpeqsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 0);}
|
|
void cmpeqsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 0);}
|
|
void cmpltsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 1);}
|
|
void cmpltsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 1);}
|
|
void cmplesd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 2);}
|
|
void cmplesd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 2);}
|
|
void cmpunordsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 3);}
|
|
void cmpunordsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 3);}
|
|
void cmpneqsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 4);}
|
|
void cmpneqsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 4);}
|
|
void cmpnltsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 5);}
|
|
void cmpnltsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 5);}
|
|
void cmpnlesd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 6);}
|
|
void cmpnlesd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 6);}
|
|
void cmpordsd(const XmmReg& dst, const XmmReg& src) {cmpsd(dst, src, 7);}
|
|
void cmpordsd(const XmmReg& dst, const Mem64& src) {cmpsd(dst, src, 7);}
|
|
void comisd(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_COMISD, 0x0F2F, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void comisd(const XmmReg& src1, const Mem64& src2) {AppendInstr(I_COMISD, 0x0F2F, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void cvtdq2pd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTDQ2PD, 0x0FE6, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvtdq2pd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTDQ2PD, 0x0FE6, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvtpd2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPD2DQ, 0x0FE6, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void cvtpd2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPD2DQ, 0x0FE6, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void cvtpd2pi(const MmxReg& dst, const XmmReg& src) {AppendInstr(I_CVTPD2PI, 0x0F2D, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtpd2pi(const MmxReg& dst, const Mem128& src) {AppendInstr(I_CVTPD2PI, 0x0F2D, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtpd2ps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPD2PS, 0x0F5A, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtpd2ps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPD2PS, 0x0F5A, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtpi2pd(const XmmReg& dst, const MmxReg& src) {AppendInstr(I_CVTPI2PD, 0x0F2A, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtpi2pd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTPI2PD, 0x0F2A, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtps2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2DQ, 0x0F5B, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtps2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPS2DQ, 0x0F5B, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvtdq2ps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTDQ2PS, 0x0F5B, 0, W(dst), R(src));}
|
|
void cvtdq2ps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTDQ2PS, 0x0F5B, 0, W(dst), R(src));}
|
|
void cvtps2pd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2PD, 0x0F5A, 0, W(dst), R(src));}
|
|
void cvtps2pd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTPS2PD, 0x0F5A, 0, W(dst), R(src));}
|
|
void cvtsd2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x0F2D, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void cvtsd2si(const Reg32& dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x0F2D, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void cvtsd2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x0F2D, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void cvtsd2si(const Reg64& dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x0F2D, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void cvtsd2ss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SS, 0x0F5A, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void cvtsd2ss(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTSD2SS, 0x0F5A, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void cvtsi2sd(const XmmReg& dst, const Reg32& src) {AppendInstr(I_CVTSI2SD, 0x0F2A, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void cvtsi2sd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_CVTSI2SD, 0x0F2A, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void cvtsi2sd(const XmmReg& dst, const Reg64& src) {AppendInstr(I_CVTSI2SD, 0x0F2A, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void cvtsi2sd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTSI2SD, 0x0F2A, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void cvtss2sd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SD, 0x0F5A, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void cvtss2sd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_CVTSS2SD, 0x0F5A, E_MANDATORY_PREFIX_F3, RW(dst), R(src));}
|
|
void cvttpd2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPD2DQ, 0x0FE6, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvttpd2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTTPD2DQ, 0x0FE6, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvttpd2pi(const MmxReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPD2PI, 0x0F2C, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvttpd2pi(const MmxReg& dst, const Mem128& src) {AppendInstr(I_CVTTPD2PI, 0x0F2C, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void cvttps2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPS2DQ, 0x0F5B, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvttps2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTTPS2DQ, 0x0F5B, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void cvttsd2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTTSD2SI, 0x0F2C, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void cvttsd2si(const Reg32& dst, const Mem64& src) {AppendInstr(I_CVTTSD2SI, 0x0F2C, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void cvttsd2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTTSD2SI, 0x0F2C, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void cvttsd2si(const Reg64& dst, const Mem64& src) {AppendInstr(I_CVTTSD2SI, 0x0F2C, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void divpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_DIVPD, 0x0F5E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void divpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_DIVPD, 0x0F5E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void divsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_DIVSD, 0x0F5E, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void divsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_DIVSD, 0x0F5E, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void lfence() {AppendInstr(I_LFENCE, 0x0FAEE8, 0);}
|
|
void maskmovdqu(const XmmReg& src, const XmmReg& mask, const Reg& dstptr) {AppendInstr(I_MASKMOVDQU, 0x0FF7, E_MANDATORY_PREFIX_66, R(src), R(mask), Dummy(R(dstptr), zdi));}
|
|
void maxpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MAXPD, 0x0F5F, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void maxpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MAXPD, 0x0F5F, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void maxsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MAXSD, 0x0F5F, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void maxsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MAXSD, 0x0F5F, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void mfence() {AppendInstr(I_MFENCE, 0x0FAEF0, 0);}
|
|
void minpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MINPD, 0x0F5D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void minpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MINPD, 0x0F5D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void minsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MINSD, 0x0F5D, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void minsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MINSD, 0x0F5D, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void movapd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVAPD, 0x0F28, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movapd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVAPD, 0x0F28, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movapd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVAPD, 0x0F29, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movd(const XmmReg& dst, const Reg32& src) {AppendInstr(I_MOVD, 0x0F6E, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MOVD, 0x0F6E, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movd(const Reg32& dst, const XmmReg& src) {AppendInstr(I_MOVD, 0x0F7E, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movd(const Mem32& dst, const XmmReg& src) {AppendInstr(I_MOVD, 0x0F7E, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movd(const XmmReg& dst, const Reg64& src) {AppendInstr(I_MOVD, 0x0F6E, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void movd(const Reg64& dst, const XmmReg& src) {AppendInstr(I_MOVD, 0x0F7E, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, R(src), W(dst));}
|
|
#endif
|
|
void movdqa(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDQA, 0x0F6F, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movdqa(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVDQA, 0x0F6F, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movdqa(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVDQA, 0x0F7F, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movdqu(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDQU, 0x0F6F, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movdqu(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVDQU, 0x0F6F, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movdqu(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVDQU, 0x0F7F, E_MANDATORY_PREFIX_F3, R(src), W(dst));}
|
|
void movdq2q(const MmxReg& dst, const XmmReg& src) {AppendInstr(I_MOVDQ2Q, 0x0FD6, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void movhpd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVHPD, 0x0F16, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void movhpd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVHPD, 0x0F17, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movlpd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVLPD, 0x0F12, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void movlpd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVLPD, 0x0F13, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movmskpd(const Reg32& dst, XmmReg& src) {AppendInstr(I_MOVMSKPD, 0x0F50, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void movmskpd(const Reg64& dst, XmmReg& src) {AppendInstr(I_MOVMSKPD, 0x0F50, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void movntdq(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTDQ, 0x0FE7, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movnti(const Mem32& dst, const Reg32& src) {AppendInstr(I_MOVNTI, 0x0FC3, 0, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movnti(const Mem64& dst, const Reg64& src) {AppendInstr(I_MOVNTI, 0x0FC3, E_REXW_PREFIX, R(src), W(dst));}
|
|
#endif
|
|
void movntpd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTPD, 0x0F2B, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void movq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVQ, 0x0F7E, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVQ, 0x0F7E, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movq(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVQ, 0x0FD6, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void movq(const XmmReg& dst, const Reg64& src) {movd(dst, src);}
|
|
void movq(const Reg64& dst, const XmmReg& src) {movd(dst, src);}
|
|
#endif
|
|
void movq2dq(const XmmReg& dst, const MmxReg& src) {AppendInstr(I_MOVQ2DQ, 0x0FD6, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSD, 0x0F10, E_MANDATORY_PREFIX_F2, RW(dst), R(src));} // 64~127bits are unchanged
|
|
void movsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVSD, 0x0F10, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void movsd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVSD, 0x0F11, E_MANDATORY_PREFIX_F2, R(src), W(dst));}
|
|
void movupd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVUPD, 0x0F10, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movupd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVUPD, 0x0F10, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void movupd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVUPD, 0x0F11, E_MANDATORY_PREFIX_66, R(src), W(dst));}
|
|
void mulpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MULPD, 0x0F59, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void mulpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MULPD, 0x0F59, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void mulsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MULSD, 0x0F59, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void mulsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MULSD, 0x0F59, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void orpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ORPD, 0x0F56, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void orpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ORPD, 0x0F56, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packsswb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PACKSSWB, 0x0F63, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packsswb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PACKSSWB, 0x0F63, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packssdw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PACKSSDW, 0x0F6B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packssdw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PACKSSDW, 0x0F6B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packuswb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PACKUSWB, 0x0F67, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packuswb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PACKUSWB, 0x0F67, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDB, 0x0FFC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDB, 0x0FFC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDW, 0x0FFD, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDW, 0x0FFD, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDD, 0x0FFE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDD, 0x0FFE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PADDQ, 0x0FD4, 0, RW(dst), R(src));}
|
|
void paddq(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PADDQ, 0x0FD4, 0, RW(dst), R(src));}
|
|
void paddq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDQ, 0x0FD4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDQ, 0x0FD4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDSB, 0x0FEC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDSB, 0x0FEC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDSW, 0x0FED, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDSW, 0x0FED, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddusb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDUSB, 0x0FDC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddusb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDUSB, 0x0FDC, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddusw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PADDUSW, 0x0FDD, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void paddusw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PADDUSW, 0x0FDD, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pand(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PAND, 0x0FDB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pand(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PAND, 0x0FDB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pandn(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PANDN, 0x0FDF, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pandn(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PANDN, 0x0FDF, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pause() {AppendInstr(I_PAUSE, 0xF390, 0);}
|
|
void pavgb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PAVGB, 0x0FE0, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pavgb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PAVGB, 0x0FE0, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pavgw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PAVGW, 0x0FE3, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pavgw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PAVGW, 0x0FE3, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPEQB, 0x0F74, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPEQB, 0x0F74, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPEQW, 0x0F75, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPEQW, 0x0F75, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPEQD, 0x0F76, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPEQD, 0x0F76, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPGTB, 0x0F64, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPGTB, 0x0F64, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPGTW, 0x0F65, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPGTW, 0x0F65, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPGTD, 0x0F66, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPGTD, 0x0F66, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pextrw(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x0FC5, E_MANDATORY_PREFIX_66, W(dst), R(src), i);}
|
|
#ifdef JITASM64
|
|
void pextrw(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x0FC5, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, W(dst), R(src), i);}
|
|
#endif
|
|
void pinsrw(const XmmReg& dst, const Reg32& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrw(const XmmReg& dst, const Mem16& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
#ifdef JITASM64
|
|
void pinsrw(const XmmReg& dst, const Reg64& src, const Imm8& i) {AppendInstr(I_PINSRW, 0x0FC4, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, RW(dst), R(src), i);}
|
|
#endif
|
|
void pmaddwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMADDWD, 0x0FF5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaddwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMADDWD, 0x0FF5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXSW, 0x0FEE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXSW, 0x0FEE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxub(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXUB, 0x0FDE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxub(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXUB, 0x0FDE, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINSW, 0x0FEA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINSW, 0x0FEA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminub(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINUB, 0x0FDA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminub(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINUB, 0x0FDA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmovmskb(const Reg32& dst, const XmmReg& src) {AppendInstr(I_PMOVMSKB, 0x0FD7, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void pmovmskb(const Reg64& dst, const XmmReg& src) {AppendInstr(I_PMOVMSKB, 0x0FD7, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
void pmulhuw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULHUW, 0x0FE4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulhuw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULHUW, 0x0FE4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulhw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULHW, 0x0FE5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulhw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULHW, 0x0FE5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmullw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULLW, 0x0FD5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmullw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULLW, 0x0FD5, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmuludq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMULUDQ, 0x0FF4, 0, RW(dst), R(src));}
|
|
void pmuludq(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMULUDQ, 0x0FF4, 0, RW(dst), R(src));}
|
|
void pmuludq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULUDQ, 0x0FF4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmuludq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULUDQ, 0x0FF4, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void por(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_POR, 0x0FEB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void por(const XmmReg& dst, const Mem128& src) {AppendInstr(I_POR, 0x0FEB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psadbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSADBW, 0x0FF6, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psadbw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSADBW, 0x0FF6, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pshufd(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFD, 0x0F70, E_MANDATORY_PREFIX_66, W(dst), R(src), order);}
|
|
void pshufd(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFD, 0x0F70, E_MANDATORY_PREFIX_66, W(dst), R(src), order);}
|
|
void pshufhw(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFHW, 0x0F70, E_MANDATORY_PREFIX_F3, W(dst), R(src), order);}
|
|
void pshufhw(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFHW, 0x0F70, E_MANDATORY_PREFIX_F3, W(dst), R(src), order);}
|
|
void pshuflw(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFLW, 0x0F70, E_MANDATORY_PREFIX_F2, W(dst), R(src), order);}
|
|
void pshuflw(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFLW, 0x0F70, E_MANDATORY_PREFIX_F2, W(dst), R(src), order);}
|
|
void psllw(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSLLW, 0x0FF1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psllw(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSLLW, 0x0FF1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psllw(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSLLW, 0x0F71, E_MANDATORY_PREFIX_66, Imm8(6), RW(dst), count);}
|
|
void pslld(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSLLD, 0x0FF2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void pslld(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSLLD, 0x0FF2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void pslld(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSLLD, 0x0F72, E_MANDATORY_PREFIX_66, Imm8(6), RW(dst), count);}
|
|
void psllq(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSLLQ, 0x0FF3, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psllq(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSLLQ, 0x0FF3, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psllq(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSLLQ, 0x0F73, E_MANDATORY_PREFIX_66, Imm8(6), RW(dst), count);}
|
|
void pslldq(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSLLDQ, 0x0F73, E_MANDATORY_PREFIX_66, Imm8(7), RW(dst), count);}
|
|
void psraw(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSRAW, 0x0FE1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psraw(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSRAW, 0x0FE1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psraw(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRAW, 0x0F71, E_MANDATORY_PREFIX_66, Imm8(4), RW(dst), count);}
|
|
void psrad(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSRAD, 0x0FE2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrad(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSRAD, 0x0FE2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrad(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRAD, 0x0F72, E_MANDATORY_PREFIX_66, Imm8(4), RW(dst), count);}
|
|
void psrlw(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSRLW, 0x0FD1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrlw(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSRLW, 0x0FD1, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrlw(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRLW, 0x0F71, E_MANDATORY_PREFIX_66, Imm8(2), RW(dst), count);}
|
|
void psrld(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSRLD, 0x0FD2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrld(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSRLD, 0x0FD2, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrld(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRLD, 0x0F72, E_MANDATORY_PREFIX_66, Imm8(2), RW(dst), count);}
|
|
void psrlq(const XmmReg& dst, const XmmReg& count) {AppendInstr(I_PSRLQ, 0x0FD3, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrlq(const XmmReg& dst, const Mem128& count) {AppendInstr(I_PSRLQ, 0x0FD3, E_MANDATORY_PREFIX_66, RW(dst), R(count));}
|
|
void psrlq(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRLQ, 0x0F73, E_MANDATORY_PREFIX_66, Imm8(2), RW(dst), count);}
|
|
void psrldq(const XmmReg& dst, const Imm8& count) {AppendInstr(I_PSRLDQ, 0x0F73, E_MANDATORY_PREFIX_66, Imm8(3), RW(dst), count);}
|
|
void psubb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBB, 0x0FF8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBB, 0x0FF8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBW, 0x0FF9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBW, 0x0FF9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBD, 0x0FFA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBD, 0x0FFA, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubq(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSUBQ, 0x0FFB, 0, RW(dst), R(src));}
|
|
void psubq(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSUBQ, 0x0FFB, 0, RW(dst), R(src));}
|
|
void psubq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBQ, 0x0FFB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBQ, 0x0FFB, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBSB, 0x0FE8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBSB, 0x0FE8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBSW, 0x0FE9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBSW, 0x0FE9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubusb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBUSB, 0x0FD8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubusb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBUSB, 0x0FD8, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubusw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSUBUSW, 0x0FD9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psubusw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSUBUSW, 0x0FD9, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKHBW, 0x0F68, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhbw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKHBW, 0x0F68, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKHWD, 0x0F69, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKHWD, 0x0F69, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKHDQ, 0x0F6A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhdq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKHDQ, 0x0F6A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhqdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKHQDQ, 0x0F6D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckhqdq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKHQDQ, 0x0F6D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKLBW, 0x0F60, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklbw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKLBW, 0x0F60, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKLWD, 0x0F61, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKLWD, 0x0F61, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckldq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKLDQ, 0x0F62, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpckldq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKLDQ, 0x0F62, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklqdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PUNPCKLQDQ, 0x0F6C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void punpcklqdq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PUNPCKLQDQ, 0x0F6C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pxor(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PXOR, 0x0FEF, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pxor(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PXOR, 0x0FEF, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void shufpd(const XmmReg& dst, const XmmReg& src, const Imm8& sel) {AppendInstr(I_SHUFPD, 0x0FC6, E_MANDATORY_PREFIX_66, RW(dst), R(src), sel);}
|
|
void shufpd(const XmmReg& dst, const Mem128& src, const Imm8& sel) {AppendInstr(I_SHUFPD, 0x0FC6, E_MANDATORY_PREFIX_66, RW(dst), R(src), sel);}
|
|
void sqrtpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTPD, 0x0F51, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void sqrtpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SQRTPD, 0x0F51, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void sqrtsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTSD, 0x0F51, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void sqrtsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_SQRTSD, 0x0F51, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void subpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SUBPD, 0x0F5C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void subpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SUBPD, 0x0F5C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void subsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SUBSD, 0x0F5C, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void subsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_SUBSD, 0x0F5C, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void ucomisd(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UCOMISD, 0x0F2E, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void ucomisd(const XmmReg& src1, const Mem64& src2) {AppendInstr(I_UCOMISD, 0x0F2E, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void unpckhpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_UNPCKHPD, 0x0F15, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void unpckhpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_UNPCKHPD, 0x0F15, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void unpcklpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_UNPCKLPD, 0x0F14, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void unpcklpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_UNPCKLPD, 0x0F14, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void xorpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_XORPD, 0x0F57, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void xorpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_XORPD, 0x0F57, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
|
|
// SSE3
|
|
void addsubps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDSUBPS, 0x0FD0, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void addsubps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ADDSUBPS, 0x0FD0, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void addsubpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_ADDSUBPD, 0x0FD0, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void addsubpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_ADDSUBPD, 0x0FD0, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void haddps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_HADDPS, 0x0F7C, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void haddps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_HADDPS, 0x0F7C, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void haddpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_HADDPD, 0x0F7C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void haddpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_HADDPD, 0x0F7C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void hsubps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_HSUBPS, 0x0F7D, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void hsubps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_HSUBPS, 0x0F7D, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void hsubpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_HSUBPD, 0x0F7D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void hsubpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_HSUBPD, 0x0F7D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void lddqu(const XmmReg& dst, const Mem128& src) {AppendInstr(I_LDDQU, 0x0FF0, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void monitor() {AppendInstr(I_MONITOR, 0x0F01C8, 0);}
|
|
void movddup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDDUP, 0x0F12, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void movddup(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVDDUP, 0x0F12, E_MANDATORY_PREFIX_F2, W(dst), R(src));}
|
|
void movshdup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSHDUP, 0x0F16, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movshdup(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVSHDUP, 0x0F16, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movsldup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSLDUP, 0x0F12, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void movsldup(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVSLDUP, 0x0F12, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void mwait() {AppendInstr(I_MWAIT, 0x0F01C9, 0);}
|
|
|
|
// SSSE3
|
|
void pabsb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PABSB, 0x0F381C, 0, RW(dst), R(src));}
|
|
void pabsb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PABSB, 0x0F381C, 0, RW(dst), R(src));}
|
|
void pabsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSB, 0x0F381C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pabsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSB, 0x0F381C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pabsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PABSW, 0x0F381D, 0, RW(dst), R(src));}
|
|
void pabsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PABSW, 0x0F381D, 0, RW(dst), R(src));}
|
|
void pabsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSW, 0x0F381D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pabsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSW, 0x0F381D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pabsd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PABSD, 0x0F381E, 0, RW(dst), R(src));}
|
|
void pabsd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PABSD, 0x0F381E, 0, RW(dst), R(src));}
|
|
void pabsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSD, 0x0F381E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pabsd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSD, 0x0F381E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void palignr(const MmxReg& dst, const MmxReg& src, const Imm8& n) {AppendInstr(I_PALIGNR, 0x0F3A0F, 0, RW(dst), R(src), n);}
|
|
void palignr(const MmxReg& dst, const Mem64& src, const Imm8& n) {AppendInstr(I_PALIGNR, 0x0F3A0F, 0, RW(dst), R(src), n);}
|
|
void palignr(const XmmReg& dst, const XmmReg& src, const Imm8& n) {AppendInstr(I_PALIGNR, 0x0F3A0F, E_MANDATORY_PREFIX_66, RW(dst), R(src), n);}
|
|
void palignr(const XmmReg& dst, const Mem128& src, const Imm8& n) {AppendInstr(I_PALIGNR, 0x0F3A0F, E_MANDATORY_PREFIX_66, RW(dst), R(src), n);}
|
|
void phaddw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHADDW, 0x0F3801, 0, RW(dst), R(src));}
|
|
void phaddw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHADDW, 0x0F3801, 0, RW(dst), R(src));}
|
|
void phaddw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHADDW, 0x0F3801, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phaddw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHADDW, 0x0F3801, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phaddd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHADDD, 0x0F3802, 0, RW(dst), R(src));}
|
|
void phaddd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHADDD, 0x0F3802, 0, RW(dst), R(src));}
|
|
void phaddd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHADDD, 0x0F3802, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phaddd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHADDD, 0x0F3802, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phaddsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHADDSW, 0x0F3803, 0, RW(dst), R(src));}
|
|
void phaddsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHADDSW, 0x0F3803, 0, RW(dst), R(src));}
|
|
void phaddsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHADDSW, 0x0F3803, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phaddsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHADDSW, 0x0F3803, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHSUBW, 0x0F3805, 0, RW(dst), R(src));}
|
|
void phsubw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHSUBW, 0x0F3805, 0, RW(dst), R(src));}
|
|
void phsubw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHSUBW, 0x0F3805, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHSUBW, 0x0F3805, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHSUBD, 0x0F3806, 0, RW(dst), R(src));}
|
|
void phsubd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHSUBD, 0x0F3806, 0, RW(dst), R(src));}
|
|
void phsubd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHSUBD, 0x0F3806, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHSUBD, 0x0F3806, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PHSUBSW, 0x0F3807, 0, RW(dst), R(src));}
|
|
void phsubsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PHSUBSW, 0x0F3807, 0, RW(dst), R(src));}
|
|
void phsubsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHSUBSW, 0x0F3807, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phsubsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHSUBSW, 0x0F3807, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaddubsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMADDUBSW,0x0F3804, 0, RW(dst), R(src));}
|
|
void pmaddubsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMADDUBSW,0x0F3804, 0, RW(dst), R(src));}
|
|
void pmaddubsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMADDUBSW,0x0F3804, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaddubsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMADDUBSW,0x0F3804, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulhrsw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PMULHRSW, 0x0F380B, 0, RW(dst), R(src));}
|
|
void pmulhrsw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PMULHRSW, 0x0F380B, 0, RW(dst), R(src));}
|
|
void pmulhrsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULHRSW, 0x0F380B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulhrsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULHRSW, 0x0F380B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pshufb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSHUFB, 0x0F3800, 0, RW(dst), R(src));}
|
|
void pshufb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSHUFB, 0x0F3800, 0, RW(dst), R(src));}
|
|
void pshufb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSHUFB, 0x0F3800, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pshufb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSHUFB, 0x0F3800, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignb(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSIGNB, 0x0F3808, 0, RW(dst), R(src));}
|
|
void psignb(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSIGNB, 0x0F3808, 0, RW(dst), R(src));}
|
|
void psignb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSIGNB, 0x0F3808, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSIGNB, 0x0F3808, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignw(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSIGNW, 0x0F3809, 0, RW(dst), R(src));}
|
|
void psignw(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSIGNW, 0x0F3809, 0, RW(dst), R(src));}
|
|
void psignw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSIGNW, 0x0F3809, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSIGNW, 0x0F3809, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignd(const MmxReg& dst, const MmxReg& src) {AppendInstr(I_PSIGND, 0x0F380A, 0, RW(dst), R(src));}
|
|
void psignd(const MmxReg& dst, const Mem64& src) {AppendInstr(I_PSIGND, 0x0F380A, 0, RW(dst), R(src));}
|
|
void psignd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PSIGND, 0x0F380A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void psignd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PSIGND, 0x0F380A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
|
|
// SSE4.1
|
|
void blendps(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0F3A0C, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void blendps(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0F3A0C, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void blendpd(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0F3A0D, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void blendpd(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0F3A0D, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void blendvps(const XmmReg& dst, const XmmReg& src, const XmmReg& mask) {AppendInstr(I_BLENDVPS, 0x0F3814, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void blendvps(const XmmReg& dst, const Mem128& src, const XmmReg& mask) {AppendInstr(I_BLENDVPS, 0x0F3814, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void blendvpd(const XmmReg& dst, const XmmReg& src, const XmmReg& mask) {AppendInstr(I_BLENDVPD, 0x0F3815, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void blendvpd(const XmmReg& dst, const Mem128& src, const XmmReg& mask) {AppendInstr(I_BLENDVPD, 0x0F3815, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void dpps(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_DPPS, 0x0F3A40, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void dpps(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_DPPS, 0x0F3A40, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void dppd(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_DPPD, 0x0F3A41, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void dppd(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_DPPD, 0x0F3A41, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void extractps(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_EXTRACTPS,0x0F3A17, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void extractps(const Mem32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_EXTRACTPS,0x0F3A17, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
#ifdef JITASM64
|
|
void extractps(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_EXTRACTPS,0x0F3A17, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, R(src), W(dst), i);}
|
|
#endif
|
|
void insertps(const XmmReg& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_INSERTPS, 0x0F3A21, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void insertps(const XmmReg& dst, const Mem32& src, const Imm8& i) {AppendInstr(I_INSERTPS, 0x0F3A21, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void movntdqa(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVNTDQA, 0x0F382A, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void mpsadbw(const XmmReg& dst, const XmmReg& src, const Imm8& offsets) {AppendInstr(I_MPSADBW, 0x0F3A42, E_MANDATORY_PREFIX_66, RW(dst), R(src), offsets);}
|
|
void mpsadbw(const XmmReg& dst, const Mem128& src, const Imm8& offsets) {AppendInstr(I_MPSADBW, 0x0F3A42, E_MANDATORY_PREFIX_66, RW(dst), R(src), offsets);}
|
|
void packusdw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PACKUSDW, 0x0F382B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void packusdw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PACKUSDW, 0x0F382B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pblendvb(const XmmReg& dst, const XmmReg& src, const XmmReg& mask) {AppendInstr(I_PBLENDVB, 0x0F3810, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void pblendvb(const XmmReg& dst, const Mem128& src, const XmmReg& mask) {AppendInstr(I_PBLENDVB, 0x0F3810, E_MANDATORY_PREFIX_66, RW(dst), R(src), Dummy(R(mask), xmm0));}
|
|
void pblendw(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_PBLENDW, 0x0F3A0E, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void pblendw(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_PBLENDW, 0x0F3A0E, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void pcmpeqq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPEQQ, 0x0F3829, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpeqq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPEQQ, 0x0F3829, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pextrb(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x0F3A14, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void pextrb(const Mem8& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x0F3A14, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void pextrw(const Mem16& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x0F3A15, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void pextrd(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x0F3A16, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void pextrd(const Mem32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x0F3A16, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
#ifdef JITASM64
|
|
void pextrb(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x0F3A14, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, R(src), W(dst), i);}
|
|
void pextrd(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x0F3A16, E_MANDATORY_PREFIX_66, R(src), W(dst), i);}
|
|
void pextrq(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRQ, 0x0F3A16, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, R(src), W(dst), i);}
|
|
void pextrq(const Mem64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRQ, 0x0F3A16, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, R(src), W(dst), i);}
|
|
#endif
|
|
void phminposuw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHMINPOSUW, 0x0F3841, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void phminposuw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHMINPOSUW, 0x0F3841, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pinsrb(const XmmReg& dst, const Reg32& src, const Imm8& i) {AppendInstr(I_PINSRB, 0x0F3A20, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrb(const XmmReg& dst, const Mem8& src, const Imm8& i) {AppendInstr(I_PINSRB, 0x0F3A20, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrd(const XmmReg& dst, const Reg32& src, const Imm8& i) {AppendInstr(I_PINSRD, 0x0F3A22, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrd(const XmmReg& dst, const Mem32& src, const Imm8& i) {AppendInstr(I_PINSRD, 0x0F3A22, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
#ifdef JITASM64
|
|
void pinsrb(const XmmReg& dst, const Reg64& src, const Imm8& i) {AppendInstr(I_PINSRB, 0x0F3A20, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrd(const XmmReg& dst, const Reg64& src, const Imm8& i) {AppendInstr(I_PINSRD, 0x0F3A22, E_MANDATORY_PREFIX_66, RW(dst), R(src), i);}
|
|
void pinsrq(const XmmReg& dst, const Reg64& src, const Imm8& i) {AppendInstr(I_PINSRQ, 0x0F3A22, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, RW(dst), R(src), i);}
|
|
void pinsrq(const XmmReg& dst, const Mem64& src, const Imm8& i) {AppendInstr(I_PINSRQ, 0x0F3A22, E_MANDATORY_PREFIX_66 | E_REXW_PREFIX, RW(dst), R(src), i);}
|
|
#endif
|
|
void pmaxsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXSB, 0x0F383C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXSB, 0x0F383C, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXSD, 0x0F383D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxsd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXSD, 0x0F383D, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxuw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXUW, 0x0F383E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxuw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXUW, 0x0F383E, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxud(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMAXUD, 0x0F383F, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmaxud(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMAXUD, 0x0F383F, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINSB, 0x0F3838, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINSB, 0x0F3838, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINSD, 0x0F3839, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminsd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINSD, 0x0F3839, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminuw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINUW, 0x0F383A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminuw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINUW, 0x0F383A, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminud(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMINUD, 0x0F383B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pminud(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMINUD, 0x0F383B, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmovsxbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBW, 0x0F3820, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxbw(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXBW, 0x0F3820, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxbd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBD, 0x0F3821, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxbd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVSXBD, 0x0F3821, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxbq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBQ, 0x0F3822, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxbq(const XmmReg& dst, const Mem16& src) {AppendInstr(I_PMOVSXBQ, 0x0F3822, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXWD, 0x0F3823, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxwd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXWD, 0x0F3823, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXWQ, 0x0F3824, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxwq(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVSXWQ, 0x0F3824, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXDQ, 0x0F3825, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovsxdq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXDQ, 0x0F3825, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBW, 0x0F3830, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbw(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXBW, 0x0F3830, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBD, 0x0F3831, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVZXBD, 0x0F3831, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBQ, 0x0F3832, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxbq(const XmmReg& dst, const Mem16& src) {AppendInstr(I_PMOVZXBQ, 0x0F3832, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXWD, 0x0F3833, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxwd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXWD, 0x0F3833, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXWQ, 0x0F3834, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxwq(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVZXWQ, 0x0F3834, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXDQ, 0x0F3835, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmovzxdq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXDQ, 0x0F3835, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void pmuldq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULDQ, 0x0F3828, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmuldq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULDQ, 0x0F3828, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulld(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMULLD, 0x0F3840, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pmulld(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PMULLD, 0x0F3840, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void ptest(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PTEST, 0x0F3817, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void ptest(const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PTEST, 0x0F3817, E_MANDATORY_PREFIX_66, R(src1), R(src2));}
|
|
void roundps(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x0F3A08, E_MANDATORY_PREFIX_66, W(dst), R(src), mode);}
|
|
void roundps(const XmmReg& dst, const Mem128& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x0F3A08, E_MANDATORY_PREFIX_66, W(dst), R(src), mode);}
|
|
void roundpd(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x0F3A09, E_MANDATORY_PREFIX_66, W(dst), R(src), mode);}
|
|
void roundpd(const XmmReg& dst, const Mem128& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x0F3A09, E_MANDATORY_PREFIX_66, W(dst), R(src), mode);}
|
|
void roundss(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDSS, 0x0F3A0A, E_MANDATORY_PREFIX_66, RW(dst), R(src), mode);}
|
|
void roundss(const XmmReg& dst, const Mem32& src, const Imm8& mode) {AppendInstr(I_ROUNDSS, 0x0F3A0A, E_MANDATORY_PREFIX_66, RW(dst), R(src), mode);}
|
|
void roundsd(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDSD, 0x0F3A0B, E_MANDATORY_PREFIX_66, RW(dst), R(src), mode);}
|
|
void roundsd(const XmmReg& dst, const Mem64& src, const Imm8& mode) {AppendInstr(I_ROUNDSD, 0x0F3A0B, E_MANDATORY_PREFIX_66, RW(dst), R(src), mode);}
|
|
|
|
// SSE4.2
|
|
void crc32(const Reg32& dst, const Reg8& src) {AppendInstr(I_CRC32, 0x0F38F0, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void crc32(const Reg32& dst, const Mem8& src) {AppendInstr(I_CRC32, 0x0F38F0, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void crc32(const Reg32& dst, const Reg16& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2 | E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void crc32(const Reg32& dst, const Mem16& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2 | E_OPERAND_SIZE_PREFIX, RW(dst), R(src));}
|
|
void crc32(const Reg32& dst, const Reg32& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
void crc32(const Reg32& dst, const Mem32& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2, RW(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void crc32(const Reg64& dst, const Reg8& src) {AppendInstr(I_CRC32, 0x0F38F0, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void crc32(const Reg64& dst, const Mem8& src) {AppendInstr(I_CRC32, 0x0F38F0, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void crc32(const Reg64& dst, const Reg64& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
void crc32(const Reg64& dst, const Mem64& src) {AppendInstr(I_CRC32, 0x0F38F1, E_MANDATORY_PREFIX_F2 | E_REXW_PREFIX, RW(dst), R(src));}
|
|
#endif
|
|
void pcmpestri(const Reg& result, const XmmReg& src1, const Reg& len1, const XmmReg& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRI, 0x0F3A61, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), ecx), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void pcmpestri(const Reg& result, const XmmReg& src1, const Reg& len1, const Mem128& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRI, 0x0F3A61, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), ecx), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void pcmpestrm(const XmmReg& result, const XmmReg& src1, const Reg& len1, const XmmReg& src2, const Reg& len2, const Imm8& mode){AppendInstr(I_PCMPESTRM, 0x0F3A60, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), xmm0), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void pcmpestrm(const XmmReg& result, const XmmReg& src1, const Reg& len1, const Mem128& src2, const Reg& len2, const Imm8& mode){AppendInstr(I_PCMPESTRM, 0x0F3A60, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), xmm0), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void pcmpistri(const Reg& result, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRI, 0x0F3A63, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), ecx));}
|
|
void pcmpistri(const Reg& result, const XmmReg& src1, const Mem128& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRI, 0x0F3A63, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), ecx));}
|
|
void pcmpistrm(const XmmReg& result, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRM, 0x0F3A62, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), xmm0));}
|
|
void pcmpistrm(const XmmReg& result, const XmmReg& src1, const Mem128& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRM, 0x0F3A62, E_MANDATORY_PREFIX_66, R(src1), R(src2), mode, Dummy(W(result), xmm0));}
|
|
void pcmpgtq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PCMPGTQ, 0x0F3837, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void pcmpgtq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PCMPGTQ, 0x0F3837, E_MANDATORY_PREFIX_66, RW(dst), R(src));}
|
|
void popcnt(const Reg16& dst, const Reg16& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3 | E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void popcnt(const Reg16& dst, const Mem16& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3 | E_OPERAND_SIZE_PREFIX, W(dst), R(src));}
|
|
void popcnt(const Reg32& dst, const Reg32& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
void popcnt(const Reg32& dst, const Mem32& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void popcnt(const Reg64& dst, const Reg64& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
void popcnt(const Reg64& dst, const Mem64& src) {AppendInstr(I_POPCNT, 0x0FB8, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, W(dst), R(src));}
|
|
#endif
|
|
|
|
// AVX
|
|
void vaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDPD, 0x58, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ADDPD, 0x58, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ADDPD, 0x58, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ADDPD, 0x58, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDPS, 0x58, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ADDPS, 0x58, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ADDPS, 0x58, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ADDPS, 0x58, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDSD, 0x58, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vaddsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_ADDSD, 0x58, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vaddss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDSS, 0x58, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vaddss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_ADDSS, 0x58, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vaddsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDSUBPD, 0xD0, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ADDSUBPD, 0xD0, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ADDSUBPD, 0xD0, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ADDSUBPD, 0xD0, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vaddsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ADDSUBPS, 0xD0, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vaddsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ADDSUBPS, 0xD0, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vaddsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ADDSUBPS, 0xD0, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vaddsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ADDSUBPS, 0xD0, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void aesenc(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESENC, 0x0F38DC, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void aesenc(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESENC, 0x0F38DC, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void vaesenc(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_AESENC, 0xDC, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vaesenc(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_AESENC, 0xDC, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void aesenclast(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESENCLAST, 0x0F38DD, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void aesenclast(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESENCLAST, 0x0F38DD, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void vaesenclast(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_AESENCLAST, 0xDD, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vaesenclast(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_AESENCLAST, 0xDD, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void aesdec(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESDEC, 0x0F38DE, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void aesdec(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESDEC, 0x0F38DE, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void vaesdec(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_AESDEC, 0xDE, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vaesdec(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_AESDEC, 0xDE, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void aesdeclast(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESDECLAST, 0x0F38DF, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void aesdeclast(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESDECLAST, 0x0F38DF, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void vaesdeclast(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_AESDECLAST, 0xDF, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vaesdeclast(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_AESDECLAST, 0xDF, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void aesimc(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESIMC, 0x0F38DB, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void aesimc(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESIMC, 0x0F38DB, E_MANDATORY_PREFIX_66, W(dst), R(src));}
|
|
void vaesimc(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_AESIMC, 0xDB, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vaesimc(const XmmReg& dst, const Mem128& src) {AppendInstr(I_AESIMC, 0xDB, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void aeskeygenassist(const XmmReg& dst, const XmmReg& src, const Imm8& imm) {AppendInstr(I_AESKEYGENASSIST, 0x0F3ADF, E_MANDATORY_PREFIX_66, W(dst), R(src), imm);}
|
|
void aeskeygenassist(const XmmReg& dst, const Mem128& src, const Imm8& imm) {AppendInstr(I_AESKEYGENASSIST, 0x0F3ADF, E_MANDATORY_PREFIX_66, W(dst), R(src), imm);}
|
|
void vaeskeygenassist(const XmmReg& dst, const XmmReg& src, const Imm8& imm) {AppendInstr(I_AESKEYGENASSIST, 0xDF, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src), imm);}
|
|
void vaeskeygenassist(const XmmReg& dst, const Mem128& src, const Imm8& imm) {AppendInstr(I_AESKEYGENASSIST, 0xDF, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src), imm);}
|
|
void vandpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ANDPD, 0x54, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ANDPD, 0x54, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ANDPD, 0x54, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ANDPD, 0x54, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ANDPS, 0x54, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ANDPS, 0x54, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ANDPS, 0x54, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ANDPS, 0x54, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ANDNPD, 0x55, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ANDNPD, 0x55, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ANDNPD, 0x55, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ANDNPD, 0x55, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ANDNPS, 0x55, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ANDNPS, 0x55, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ANDNPS, 0x55, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vandnps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ANDNPS, 0x55, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vblendpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0D, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0D, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0D, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& mask) {AppendInstr(I_BLENDPD, 0x0D, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0C, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0C, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0C, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& mask) {AppendInstr(I_BLENDPS, 0x0C, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vblendvpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& mask) {AppendInstr(I_BLENDVPD, 0x4B, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& mask) {AppendInstr(I_BLENDVPD, 0x4B, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& mask) {AppendInstr(I_BLENDVPD, 0x4B, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& mask) {AppendInstr(I_BLENDVPD, 0x4B, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& mask) {AppendInstr(I_BLENDVPS, 0x4A, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& mask) {AppendInstr(I_BLENDVPS, 0x4A, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& mask) {AppendInstr(I_BLENDVPS, 0x4A, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vblendvps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& mask) {AppendInstr(I_BLENDVPS, 0x4A, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), R(mask));}
|
|
void vbroadcastss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_VBROADCASTSS, 0x18, E_VEX_128_66_0F38_W0, W(dst), R(src));}
|
|
void vbroadcastss(const YmmReg& dst, const Mem32& src) {AppendInstr(I_VBROADCASTSS, 0x18, E_VEX_256_66_0F38_W0, W(dst), R(src));}
|
|
void vbroadcastsd(const YmmReg& dst, const Mem64& src) {AppendInstr(I_VBROADCASTSD, 0x19, E_VEX_256_66_0F38_W0, W(dst), R(src));}
|
|
void vbroadcastf128(const YmmReg& dst, const Mem128& src) {AppendInstr(I_VBROADCASTF128, 0x1A, E_VEX_256_66_0F38_W0, W(dst), R(src));}
|
|
void vcmppd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPPD, 0xC2, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmppd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& imm) {AppendInstr(I_CMPPD, 0xC2, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmppd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPPD, 0xC2, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmppd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& imm) {AppendInstr(I_CMPPD, 0xC2, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPPS, 0xC2, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& imm) {AppendInstr(I_CMPPS, 0xC2, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPPS, 0xC2, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& imm) {AppendInstr(I_CMPPS, 0xC2, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPSD, 0xC2, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const Imm8& imm) {AppendInstr(I_CMPSD, 0xC2, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& imm) {AppendInstr(I_CMPSS, 0xC2, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1), imm);}
|
|
void vcmpss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const Imm8& imm) {AppendInstr(I_CMPSS, 0xC2, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1), imm);}
|
|
void vcomisd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_COMISD, 0x2F, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcomisd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_COMISD, 0x2F, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcomiss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_COMISS, 0x2F, E_VEX_LIG | E_VEX_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vcomiss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_COMISS, 0x2F, E_VEX_LIG | E_VEX_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vcvtdq2pd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTDQ2PD, 0xE6, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvtdq2pd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTDQ2PD, 0xE6, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvtdq2pd(const YmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTDQ2PD, 0xE6, E_VEX_256 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvtdq2pd(const YmmReg& dst, const Mem128& src) {AppendInstr(I_CVTDQ2PD, 0xE6, E_VEX_256 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvtdq2ps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTDQ2PS, 0x5B, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vcvtdq2ps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTDQ2PS, 0x5B, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vcvtdq2ps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTDQ2PS, 0x5B, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vcvtdq2ps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_CVTDQ2PS, 0x5B, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vcvtpd2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPD2DQ, 0xE6, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vcvtpd2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPD2DQ, 0xE6, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vcvtpd2dq(const XmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTPD2DQ, 0xE6, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vcvtpd2dq(const XmmReg& dst, const Mem256& src) {AppendInstr(I_CVTPD2DQ, 0xE6, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vcvtpd2ps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPD2PS, 0x5A, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtpd2ps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPD2PS, 0x5A, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtpd2ps(const XmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTPD2PS, 0x5A, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtpd2ps(const XmmReg& dst, const Mem256& src) {AppendInstr(I_CVTPD2PS, 0x5A, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2DQ, 0x5B, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPS2DQ, 0x5B, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2dq(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTPS2DQ, 0x5B, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2dq(const YmmReg& dst, const Mem256& src) {AppendInstr(I_CVTPS2DQ, 0x5B, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2pd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2PD, 0x5A, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2pd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_CVTPS2PD, 0x5A, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2pd(const YmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTPS2PD, 0x5A, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vcvtps2pd(const YmmReg& dst, const Mem128& src) {AppendInstr(I_CVTPS2PD, 0x5A, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vcvtsd2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x2D, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vcvtsd2si(const Reg32& dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x2D, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vcvtsd2si(const Reg64 dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x2D, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src));}
|
|
void vcvtsd2si(const Reg64 dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x2D, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src));}
|
|
#endif
|
|
void vcvtsd2ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_CVTSD2SS, 0x5A, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vcvtsd2ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_CVTSD2SS, 0x5A, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vcvtsi2sd(const XmmReg& dst, const XmmReg& src1, const Reg32& src2) {AppendInstr(I_CVTSI2SD, 0x2A, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src2), R(src1));}
|
|
void vcvtsi2sd(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_CVTSI2SD, 0x2A, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src2), R(src1));}
|
|
#ifdef JITASM64
|
|
void vcvtsi2sd(const XmmReg& dst, const XmmReg& src1, const Reg64& src2) {AppendInstr(I_CVTSI2SD, 0x2A, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src2), R(src1));}
|
|
void vcvtsi2sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_CVTSI2SD, 0x2A, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src2), R(src1));}
|
|
#endif
|
|
void vcvtsi2ss(const XmmReg& dst, const XmmReg& src1, const Reg32& src2) {AppendInstr(I_CVTSI2SS, 0x2A, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src2), R(src1));}
|
|
void vcvtsi2ss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_CVTSI2SS, 0x2A, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src2), R(src1));}
|
|
#ifdef JITASM64
|
|
void vcvtsi2ss(const XmmReg& dst, const XmmReg& src1, const Reg64& src2) {AppendInstr(I_CVTSI2SS, 0x2A, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src2), R(src1));}
|
|
void vcvtsi2ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_CVTSI2SS, 0x2A, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src2), R(src1));}
|
|
#endif
|
|
void vcvtss2sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_CVTSS2SD, 0x5A, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vcvtss2sd(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_CVTSS2SD, 0x5A, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vcvtss2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x2D, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vcvtss2si(const Reg32& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x2D, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vcvtss2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x2D, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src));}
|
|
void vcvtss2si(const Reg64& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x2D, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src));}
|
|
#endif
|
|
void vcvttpd2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPD2DQ, 0xE6, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvttpd2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTTPD2DQ, 0xE6, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vcvttpd2dq(const XmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTTPD2DQ, 0xE6, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvttpd2dq(const XmmReg& dst, const Mem256& src) {AppendInstr(I_CVTTPD2DQ, 0xE6, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vcvttps2dq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_CVTTPS2DQ, 0x5B, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvttps2dq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_CVTTPS2DQ, 0x5B, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvttps2dq(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_CVTTPS2DQ, 0x5B, E_VEX_256 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvttps2dq(const YmmReg& dst, const Mem256& src) {AppendInstr(I_CVTTPS2DQ, 0x5B, E_VEX_256 | E_VEX_F3_0F, W(dst), R(src));}
|
|
void vcvttsd2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x2C, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vcvttsd2si(const Reg32& dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x2C, E_VEX_128 | E_VEX_F2_0F | E_VEX_W0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vcvttsd2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTSD2SI, 0x2C, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src));}
|
|
void vcvttsd2si(const Reg64& dst, const Mem64& src) {AppendInstr(I_CVTSD2SI, 0x2C, E_VEX_128 | E_VEX_F2_0F | E_VEX_W1, W(dst), R(src));}
|
|
#endif
|
|
void vcvttss2si(const Reg32& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x2C, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vcvttss2si(const Reg32& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x2C, E_VEX_128 | E_VEX_F3_0F | E_VEX_W0, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vcvttss2si(const Reg64& dst, const XmmReg& src) {AppendInstr(I_CVTSS2SI, 0x2C, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src));}
|
|
void vcvttss2si(const Reg64& dst, const Mem32& src) {AppendInstr(I_CVTSS2SI, 0x2C, E_VEX_128 | E_VEX_F3_0F | E_VEX_W1, W(dst), R(src));}
|
|
#endif
|
|
void vdivpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_DIVPD, 0x5E, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_DIVPD, 0x5E, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_DIVPD, 0x5E, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_DIVPD, 0x5E, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_DIVPS, 0x5E, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_DIVPS, 0x5E, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_DIVPS, 0x5E, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_DIVPS, 0x5E, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vdivsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_DIVSD, 0x5E, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vdivsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_DIVSD, 0x5E, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vdivss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_DIVSS, 0x5E, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vdivss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_DIVSS, 0x5E, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vdppd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_DPPD, 0x41, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vdppd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_DPPD, 0x41, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vdpps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_DPPS, 0x40, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vdpps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_DPPS, 0x40, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vdpps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& mask) {AppendInstr(I_DPPS, 0x40, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vdpps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& mask) {AppendInstr(I_DPPS, 0x40, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vextractf128(const XmmReg& dst, const YmmReg& src, const Imm8& i) {AppendInstr(I_VEXTRACTF128, 0x19, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), i);}
|
|
void vextractf128(const Mem128& dst, const YmmReg& src, const Imm8& i) {AppendInstr(I_VEXTRACTF128, 0x19, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vextractps(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_EXTRACTPS, 0x17, E_VEX_128 | E_VEX_66_0F3A, R(src), W(dst), i);}
|
|
void vextractps(const Mem32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_EXTRACTPS, 0x17, E_VEX_128 | E_VEX_66_0F3A, R(src), W(dst), i);}
|
|
void vhaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhaddpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhaddpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhaddps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhaddps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_HADDPD, 0x7C, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vhsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vhsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_HSUBPD, 0x7D, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vinsertf128(const YmmReg& dst, const YmmReg& src1, const XmmReg& src2, const Imm8& i) {AppendInstr(I_VINSERTF128, 0x18, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vinsertf128(const YmmReg& dst, const YmmReg& src1, const Mem128& src2, const Imm8& i) {AppendInstr(I_VINSERTF128, 0x18, E_VEX_256 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vinsertps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& i) {AppendInstr(I_INSERTPS, 0x21, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vinsertps(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const Imm8& i) {AppendInstr(I_INSERTPS, 0x21, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vlddqu(const XmmReg& dst, const Mem128& src) {AppendInstr(I_LDDQU, 0xF0, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vlddqu(const YmmReg& dst, const Mem256& src) {AppendInstr(I_LDDQU, 0xF0, E_VEX_256 | E_VEX_F2_0F, W(dst), R(src));}
|
|
void vldmxcsr(const Mem32& src) {AppendInstr(I_LDMXCSR, 0xAE, E_VEX_LZ | E_VEX_0F | E_VEX_WIG, Imm8(2), R(src));}
|
|
void vmaskmovdqu(const XmmReg& src, const XmmReg& mask, const Reg& dstptr) {AppendInstr(I_MASKMOVDQU, 0xF7, E_VEX_128_66_0F_WIG, R(src), R(mask), Dummy(R(dstptr), zdi));}
|
|
void vmaskmovps(const XmmReg& dst, const XmmReg& mask, const Mem128& src) {AppendInstr(I_VMASKMOVPS, 0x2C, E_VEX_128_66_0F38_W0, W(dst), R(src), R(mask));}
|
|
void vmaskmovps(const YmmReg& dst, const YmmReg& mask, const Mem256& src) {AppendInstr(I_VMASKMOVPS, 0x2C, E_VEX_256_66_0F38_W0, W(dst), R(src), R(mask));}
|
|
void vmaskmovpd(const XmmReg& dst, const XmmReg& mask, const Mem128& src) {AppendInstr(I_VMASKMOVPD, 0x2D, E_VEX_128_66_0F38_W0, W(dst), R(src), R(mask));}
|
|
void vmaskmovpd(const YmmReg& dst, const YmmReg& mask, const Mem256& src) {AppendInstr(I_VMASKMOVPD, 0x2D, E_VEX_256_66_0F38_W0, W(dst), R(src), R(mask));}
|
|
void vmaskmovps(const Mem128& dst, const XmmReg& mask, const XmmReg& src) {AppendInstr(I_VMASKMOVPS, 0x2E, E_VEX_128_66_0F38_W0, R(src), W(dst), R(mask));}
|
|
void vmaskmovps(const Mem256& dst, const YmmReg& mask, const YmmReg& src) {AppendInstr(I_VMASKMOVPS, 0x2E, E_VEX_256_66_0F38_W0, R(src), W(dst), R(mask));}
|
|
void vmaskmovpd(const Mem128& dst, const XmmReg& mask, const XmmReg& src) {AppendInstr(I_VMASKMOVPD, 0x2F, E_VEX_128_66_0F38_W0, R(src), W(dst), R(mask));}
|
|
void vmaskmovpd(const Mem256& dst, const YmmReg& mask, const YmmReg& src) {AppendInstr(I_VMASKMOVPD, 0x2F, E_VEX_256_66_0F38_W0, R(src), W(dst), R(mask));}
|
|
void vmaxpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MAXPD, 0x5F, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MAXPD, 0x5F, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MAXPD, 0x5F, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MAXPD, 0x5F, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MAXPS, 0x5F, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MAXPS, 0x5F, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MAXPS, 0x5F, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MAXPS, 0x5F, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmaxsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MAXSD, 0x5F, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vmaxsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MAXSD, 0x5F, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vmaxss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MAXSS, 0x5F, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vmaxss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_MAXSS, 0x5F, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vminpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MINPD, 0x5D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MINPD, 0x5D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MINPD, 0x5D, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MINPD, 0x5D, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MINPS, 0x5D, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MINPS, 0x5D, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MINPS, 0x5D, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MINPS, 0x5D, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vminsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MINSD, 0x5D, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vminsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MINSD, 0x5D, E_VEX_128 | E_VEX_F2_0F, W(dst), R(src2), R(src1));}
|
|
void vminss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MINSS, 0x5D, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vminss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_MINSS, 0x5D, E_VEX_128 | E_VEX_F3_0F, W(dst), R(src2), R(src1));}
|
|
void vmovapd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVAPD, 0x28, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovapd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVAPD, 0x28, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovapd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVAPD, 0x29, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovapd(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVAPD, 0x28, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovapd(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVAPD, 0x28, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovapd(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVAPD, 0x29, E_VEX_256_66_0F_WIG, R(src), W(dst));}
|
|
void vmovaps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVAPS, 0x28, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovaps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVAPS, 0x28, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovaps(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVAPS, 0x29, E_VEX_128_0F_WIG, R(src), W(dst));}
|
|
void vmovaps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVAPS, 0x28, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vmovaps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVAPS, 0x28, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vmovaps(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVAPS, 0x29, E_VEX_256_0F_WIG, R(src), W(dst));}
|
|
void vmovd(const XmmReg& dst, const Reg32& src) {AppendInstr(I_MOVD, 0x6E, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vmovd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MOVD, 0x6E, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(src));}
|
|
void vmovd(const Reg32& dst, const XmmReg& src) {AppendInstr(I_MOVD, 0x7E, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, R(src), W(dst));}
|
|
void vmovd(const Mem32& dst, const XmmReg& src) {AppendInstr(I_MOVD, 0x7E, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, R(src), W(dst));}
|
|
void vmovq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVQ, 0x7E, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVQ, 0x7E, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovq(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVQ, 0xD6, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
#ifdef JITASM64
|
|
void vmovq(const XmmReg& dst, const Reg64& src) {AppendInstr(I_MOVQ, 0x6E, E_VEX_128 | E_VEX_66_0F | E_VEX_W1, W(dst), R(src));}
|
|
void vmovq(const Reg64& dst, const XmmReg& src) {AppendInstr(I_MOVQ, 0x7E, E_VEX_128 | E_VEX_66_0F | E_VEX_W1, R(src), W(dst));}
|
|
#endif
|
|
void vmovddup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDDUP, 0x12, E_VEX_128 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovddup(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVDDUP, 0x12, E_VEX_128 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovddup(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVDDUP, 0x12, E_VEX_256 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovddup(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVDDUP, 0x12, E_VEX_256 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovdqa(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDQA, 0x6F, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovdqa(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVDQA, 0x6F, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovdqa(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVDQA, 0x7F, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovdqa(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVDQA, 0x6F, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovdqa(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVDQA, 0x6F, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovdqa(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVDQA, 0x7F, E_VEX_256_66_0F_WIG, R(src), W(dst));}
|
|
void vmovdqu(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVDQU, 0x6F, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovdqu(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVDQU, 0x6F, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovdqu(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVDQU, 0x7F, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, R(src), W(dst));}
|
|
void vmovdqu(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVDQU, 0x6F, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovdqu(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVDQU, 0x6F, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovdqu(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVDQU, 0x7F, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, R(src), W(dst));}
|
|
void vmovhlps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MOVHLPS, 0x12, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovhpd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MOVHPD, 0x16, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovhpd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVHPD, 0x17, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovhps(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MOVHPS, 0x16, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovhps(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVHPS, 0x17, E_VEX_128_0F_WIG, R(src), W(dst));}
|
|
void vmovlhps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MOVHLPS, 0x16, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovlpd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MOVLPD, 0x12, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovlpd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVLPD, 0x13, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovlps(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MOVLPS, 0x12, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovlps(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVLPS, 0x13, E_VEX_128_0F_WIG, R(src), W(dst));}
|
|
void vmovmskpd(const Reg32& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPD, 0x50, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovmskpd(const Reg32& dst, const YmmReg& src) {AppendInstr(I_MOVMSKPD, 0x50, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vmovmskpd(const Reg64& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPD, 0x50, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovmskpd(const Reg64& dst, const YmmReg& src) {AppendInstr(I_MOVMSKPD, 0x50, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
#endif
|
|
void vmovmskps(const Reg32& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPS, 0x50, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovmskps(const Reg32& dst, const YmmReg& src) {AppendInstr(I_MOVMSKPS, 0x50, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vmovmskps(const Reg64& dst, const XmmReg& src) {AppendInstr(I_MOVMSKPS, 0x50, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovmskps(const Reg64& dst, const YmmReg& src) {AppendInstr(I_MOVMSKPS, 0x50, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
#endif
|
|
void vmovntdq(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTDQ, 0xE7, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovntdq(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVNTDQ, 0xE7, E_VEX_256_66_0F_WIG, R(src), W(dst));}
|
|
void vmovntdqa(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVNTDQA, 0x2A, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vmovntpd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTPD, 0x2B, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovntpd(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVNTPD, 0x2B, E_VEX_256_66_0F_WIG, R(src), W(dst));}
|
|
void vmovntps(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVNTPS, 0x2B, E_VEX_128_0F_WIG, R(src), W(dst));}
|
|
void vmovntps(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVNTPS, 0x2B, E_VEX_256_0F_WIG, R(src), W(dst));}
|
|
void vmovsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MOVSD, 0x10, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_MOVSD, 0x10, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovsd(const Mem64& dst, const XmmReg& src) {AppendInstr(I_MOVSD, 0x11, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, R(src), W(dst));}
|
|
void vmovshdup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSHDUP, 0x16, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovshdup(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVSHDUP, 0x16, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovshdup(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVSHDUP, 0x16, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovshdup(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVSHDUP, 0x16, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovsldup(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVSLDUP, 0x12, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovsldup(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVSLDUP, 0x12, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovsldup(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVSLDUP, 0x12, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovsldup(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVSLDUP, 0x12, E_VEX_256 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MOVSS, 0x10, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vmovss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_MOVSS, 0x10, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src));}
|
|
void vmovss(const Mem32& dst, const XmmReg& src) {AppendInstr(I_MOVSS, 0x11, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, R(src), W(dst));}
|
|
void vmovupd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVUPD, 0x10, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovupd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVUPD, 0x10, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vmovupd(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVUPD, 0x11, E_VEX_128_66_0F_WIG, R(src), W(dst));}
|
|
void vmovupd(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVUPD, 0x10, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovupd(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVUPD, 0x10, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vmovupd(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVUPD, 0x11, E_VEX_256_66_0F_WIG, R(src), W(dst));}
|
|
void vmovups(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_MOVUPS, 0x10, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovups(const XmmReg& dst, const Mem128& src) {AppendInstr(I_MOVUPS, 0x10, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vmovups(const Mem128& dst, const XmmReg& src) {AppendInstr(I_MOVUPS, 0x11, E_VEX_128_0F_WIG, R(src), W(dst));}
|
|
void vmovups(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_MOVUPS, 0x10, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vmovups(const YmmReg& dst, const Mem256& src) {AppendInstr(I_MOVUPS, 0x10, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vmovups(const Mem256& dst, const YmmReg& src) {AppendInstr(I_MOVUPS, 0x11, E_VEX_256_0F_WIG, R(src), W(dst));}
|
|
void vmpsadbw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& i) {AppendInstr(I_MPSADBW, 0x42, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src2), R(src1), i);}
|
|
void vmpsadbw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& i) {AppendInstr(I_MPSADBW, 0x42, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src2), R(src1), i);}
|
|
void vmulpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MULPD, 0x59, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MULPD, 0x59, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MULPD, 0x59, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MULPD, 0x59, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MULPS, 0x59, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_MULPS, 0x59, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_MULPS, 0x59, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_MULPS, 0x59, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MULSD, 0x59, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_MULSD, 0x59, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_MULSS, 0x59, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vmulss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_MULSS, 0x59, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vorpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ORPD, 0x56, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ORPD, 0x56, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ORPD, 0x56, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ORPD, 0x56, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_ORPS, 0x56, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_ORPS, 0x56, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_ORPS, 0x56, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vorps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_ORPS, 0x56, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpabsb(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSB, 0x1C, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpabsb(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSB, 0x1C, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpabsw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSW, 0x1D, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpabsw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSW, 0x1D, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpabsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PABSD, 0x1E, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpabsd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PABSD, 0x1E, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpacksswb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PACKSSWB, 0x63, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpacksswb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PACKSSWB, 0x63, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackssdw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PACKSSDW, 0x6B, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackssdw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PACKSSDW, 0x6B, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackuswb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PACKUSWB, 0x67, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackuswb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PACKUSWB, 0x67, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackusdw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PACKUSDW, 0x2B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpackusdw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PACKUSDW, 0x2B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDB, 0xFC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDB, 0xFC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDW, 0xFD, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDW, 0xFD, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDD, 0xFE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDD, 0xFE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDQ, 0xD4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDQ, 0xD4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddsb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDSB, 0xEC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddsb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDSB, 0xEC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDSW, 0xED, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDSW, 0xED, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddusb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDUSB, 0xDC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddusb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDUSB, 0xDC, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddusw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PADDUSW, 0xDD, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpaddusw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PADDUSW, 0xDD, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpalignr(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& i) {AppendInstr(I_PALIGNR, 0x0F, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vpalignr(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& i) {AppendInstr(I_PALIGNR, 0x0F, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), i);}
|
|
void vpand(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PAND, 0xDB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpand(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PAND, 0xDB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpandn(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PANDN, 0xDF, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpandn(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PANDN, 0xDF, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpavgb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PAVGB, 0xE0, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpavgb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PAVGB, 0xE0, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpavgw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PAVGW, 0xE3, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpavgw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PAVGW, 0xE3, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpblendvb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& mask) {AppendInstr(I_PBLENDVB, 0x4C, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src2), R(src1), R(mask));}
|
|
void vpblendvb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& mask) {AppendInstr(I_PBLENDVB, 0x4C, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src2), R(src1), R(mask));}
|
|
void vpblendw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_PBLENDW, 0x0E, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vpblendw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_PBLENDW, 0x0E, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void pclmulqdq(const XmmReg& dst, const XmmReg& src, const Imm8& mask) {AppendInstr(I_PCLMULQDQ, 0x0F3A44, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void pclmulqdq(const XmmReg& dst, const Mem128& src, const Imm8& mask) {AppendInstr(I_PCLMULQDQ, 0x0F3A44, E_MANDATORY_PREFIX_66, RW(dst), R(src), mask);}
|
|
void vpclmulqdq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mask) {AppendInstr(I_PCLMULQDQ, 0x44, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vpclmulqdq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& mask) {AppendInstr(I_PCLMULQDQ, 0x44, E_VEX_128 | E_VEX_66_0F3A, W(dst), R(src2), R(src1), mask);}
|
|
void vpcmpeqb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPEQB, 0x74, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPEQB, 0x74, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPEQW, 0x75, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPEQW, 0x75, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPEQD, 0x76, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPEQD, 0x76, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPEQQ, 0x29, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpeqq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPEQQ, 0x29, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPGTB, 0x64, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPGTB, 0x64, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPGTW, 0x65, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPGTW, 0x65, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPGTD, 0x66, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPGTD, 0x66, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PCMPGTQ, 0x37, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpgtq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PCMPGTQ, 0x37, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpcmpestri(const Reg& result, const XmmReg& src1, const Reg& len1, const XmmReg& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRI, 0x61, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), ecx), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void vpcmpestri(const Reg& result, const XmmReg& src1, const Reg& len1, const Mem128& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRI, 0x61, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), ecx), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void vpcmpestrm(const XmmReg& result, const XmmReg& src1, const Reg& len1, const XmmReg& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRM, 0x60, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), xmm0), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void vpcmpestrm(const XmmReg& result, const XmmReg& src1, const Reg& len1, const Mem128& src2, const Reg& len2, const Imm8& mode) {AppendInstr(I_PCMPESTRM, 0x60, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), xmm0), Dummy(R(len1), eax), Dummy(R(len2), edx));}
|
|
void vpcmpistri(const Reg& result, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRI, 0x63, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), ecx));}
|
|
void vpcmpistri(const Reg& result, const XmmReg& src1, const Mem128& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRI, 0x63, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), ecx));}
|
|
void vpcmpistrm(const XmmReg& result, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRM, 0x62, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), xmm0));}
|
|
void vpcmpistrm(const XmmReg& result, const XmmReg& src1, const Mem128& src2, const Imm8& mode) {AppendInstr(I_PCMPISTRM, 0x62, E_VEX_128 | E_VEX_66_0F3A, R(src1), R(src2), mode, Dummy(W(result), xmm0));}
|
|
void vpermilpd(const XmmReg& dst, const XmmReg& src, const XmmReg& ctrl) {AppendInstr(I_VPERMILPD, 0x0D, E_VEX_128_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilpd(const XmmReg& dst, const XmmReg& src, const Mem128& ctrl) {AppendInstr(I_VPERMILPD, 0x0D, E_VEX_128_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilpd(const YmmReg& dst, const YmmReg& src, const YmmReg& ctrl) {AppendInstr(I_VPERMILPD, 0x0D, E_VEX_256_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilpd(const YmmReg& dst, const YmmReg& src, const Mem256& ctrl) {AppendInstr(I_VPERMILPD, 0x0D, E_VEX_256_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilpd(const XmmReg& dst, const XmmReg& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPD, 0x05, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilpd(const XmmReg& dst, const Mem128& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPD, 0x05, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilpd(const YmmReg& dst, const YmmReg& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPD, 0x05, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilpd(const YmmReg& dst, const Mem256& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPD, 0x05, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilps(const XmmReg& dst, const XmmReg& src, const XmmReg& ctrl) {AppendInstr(I_VPERMILPS, 0x0C, E_VEX_128_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilps(const XmmReg& dst, const XmmReg& src, const Mem128& ctrl) {AppendInstr(I_VPERMILPS, 0x0C, E_VEX_128_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilps(const YmmReg& dst, const YmmReg& src, const YmmReg& ctrl) {AppendInstr(I_VPERMILPS, 0x0C, E_VEX_256_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilps(const YmmReg& dst, const YmmReg& src, const Mem256& ctrl) {AppendInstr(I_VPERMILPS, 0x0C, E_VEX_256_66_0F38_W0, W(dst), R(ctrl), R(src));}
|
|
void vpermilps(const XmmReg& dst, const XmmReg& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPS, 0x04, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilps(const XmmReg& dst, const Mem128& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPS, 0x04, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilps(const YmmReg& dst, const YmmReg& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPS, 0x04, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vpermilps(const YmmReg& dst, const Mem256& src, const Imm8& ctrl) {AppendInstr(I_VPERMILPS, 0x04, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src), ctrl);}
|
|
void vperm2f128(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& ctrl) {AppendInstr(I_VPERM2F128, 0x06, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src2), R(src1), ctrl);}
|
|
void vperm2f128(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& ctrl) {AppendInstr(I_VPERM2F128, 0x06, E_VEX_256 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(src2), R(src1), ctrl);}
|
|
void vpextrb(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x14, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrb(const Mem8& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x14, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrw(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0xC5, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(src), i);}
|
|
void vpextrw(const Mem16& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0x15, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrd(const Reg32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x16, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrd(const Mem32& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x16, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
#ifdef JITASM64
|
|
void vpextrb(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRB, 0x14, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrw(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRW, 0xC5, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(src), i);}
|
|
void vpextrd(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRD, 0x16, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, R(src), W(dst), i);}
|
|
void vpextrq(const Reg64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRQ, 0x16, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W1, R(src), W(dst), i);}
|
|
void vpextrq(const Mem64& dst, const XmmReg& src, const Imm8& i) {AppendInstr(I_PEXTRQ, 0x16, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W1, R(src), W(dst), i);}
|
|
#endif
|
|
void vphaddw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHADDW, 0x01, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphaddw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHADDW, 0x01, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphaddd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHADDD, 0x02, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphaddd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHADDD, 0x02, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphaddsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHADDSW, 0x03, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphaddsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHADDSW, 0x03, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphminposuw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PHMINPOSUW, 0x41, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vphminposuw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_PHMINPOSUW, 0x41, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vphsubw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHSUBW, 0x05, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphsubw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHSUBW, 0x05, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphsubd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHSUBD, 0x06, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphsubd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHSUBD, 0x06, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphsubsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PHSUBSW, 0x07, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vphsubsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PHSUBSW, 0x07, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpinsrb(const XmmReg& dst, const XmmReg& src, const Reg32& val, const Imm8& i) {AppendInstr(I_PINSRB, 0x20, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrb(const XmmReg& dst, const XmmReg& src, const Mem8& val, const Imm8& i) {AppendInstr(I_PINSRB, 0x20, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrw(const XmmReg& dst, const XmmReg& src, const Reg32& val, const Imm8& i) {AppendInstr(I_PINSRW, 0xC4, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrw(const XmmReg& dst, const XmmReg& src, const Mem16& val, const Imm8& i) {AppendInstr(I_PINSRW, 0xC4, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrd(const XmmReg& dst, const XmmReg& src, const Reg32& val, const Imm8& i) {AppendInstr(I_PINSRD, 0x22, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrd(const XmmReg& dst, const XmmReg& src, const Mem32& val, const Imm8& i) {AppendInstr(I_PINSRD, 0x22, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
#ifdef JITASM64
|
|
void vpinsrb(const XmmReg& dst, const XmmReg& src, const Reg64& val, const Imm8& i) {AppendInstr(I_PINSRB, 0x20, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrw(const XmmReg& dst, const XmmReg& src, const Reg64& val, const Imm8& i) {AppendInstr(I_PINSRW, 0xC4, E_VEX_128 | E_VEX_66_0F | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrd(const XmmReg& dst, const XmmReg& src, const Reg64& val, const Imm8& i) {AppendInstr(I_PINSRD, 0x22, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W0, W(dst), R(val), R(src), i);}
|
|
void vpinsrq(const XmmReg& dst, const XmmReg& src, const Reg64& val, const Imm8& i) {AppendInstr(I_PINSRQ, 0x22, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W1, W(dst), R(val), R(src), i);}
|
|
void vpinsrq(const XmmReg& dst, const XmmReg& src, const Mem64& val, const Imm8& i) {AppendInstr(I_PINSRQ, 0x22, E_VEX_128 | E_VEX_66_0F3A | E_VEX_W1, W(dst), R(val), R(src), i);}
|
|
#endif
|
|
void vpmaddwd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMADDWD, 0xF5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaddwd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMADDWD, 0xF5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaddubsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMADDUBSW,0x04, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaddubsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMADDUBSW,0x04, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXSB, 0x3C, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXSB, 0x3C, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXSW, 0xEE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXSW, 0xEE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXSD, 0x3D, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxsd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXSD, 0x3D, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxub(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXUB, 0xDE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxub(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXUB, 0xDE, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxuw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXUW, 0x3E, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxuw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXUW, 0x3E, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxud(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMAXUD, 0x3F, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmaxud(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMAXUD, 0x3F, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINSB, 0x38, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINSB, 0x38, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINSW, 0xEA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINSW, 0xEA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINSD, 0x39, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminsd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINSD, 0x39, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminub(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINUB, 0xDA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminub(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINUB, 0xDA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminuw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINUW, 0x3A, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminuw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINUW, 0x3A, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminud(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMINUD, 0x3B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpminud(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMINUD, 0x3B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmovmskb(const Reg32& dst, const XmmReg& src) {AppendInstr(I_PMOVMSKB, 0xD7, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
#ifdef JITASM64
|
|
void vpmovmskb(const Reg64& dst, const XmmReg& src) {AppendInstr(I_PMOVMSKB, 0xD7, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
#endif
|
|
void vpmovsxbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBW, 0x20, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxbw(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXBW, 0x20, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxbd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBD, 0x21, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxbd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVSXBD, 0x21, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxbq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXBQ, 0x22, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxbq(const XmmReg& dst, const Mem16& src) {AppendInstr(I_PMOVSXBQ, 0x22, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXWD, 0x23, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxwd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXWD, 0x23, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXWQ, 0x24, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxwq(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVSXWQ, 0x24, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVSXDQ, 0x25, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovsxdq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVSXDQ, 0x25, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBW, 0x30, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbw(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXBW, 0x30, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBD, 0x31, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbd(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVZXBD, 0x31, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXBQ, 0x32, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxbq(const XmmReg& dst, const Mem16& src) {AppendInstr(I_PMOVZXBQ, 0x32, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXWD, 0x33, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxwd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXWD, 0x33, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXWQ, 0x34, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxwq(const XmmReg& dst, const Mem32& src) {AppendInstr(I_PMOVZXWQ, 0x34, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_PMOVZXDQ, 0x35, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmovzxdq(const XmmReg& dst, const Mem64& src) {AppendInstr(I_PMOVZXDQ, 0x35, E_VEX_128_66_0F38_WIG, W(dst), R(src));}
|
|
void vpmulhuw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2){AppendInstr(I_PMULHUW, 0xE4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulhuw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2){AppendInstr(I_PMULHUW, 0xE4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulhrsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2){AppendInstr(I_PMULHRSW, 0x0B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulhrsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2){AppendInstr(I_PMULHRSW, 0x0B, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulhw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMULHW, 0xE5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulhw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMULHW, 0xE5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmullw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMULLW, 0xD5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmullw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMULLW, 0xD5, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulld(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMULLD, 0x40, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmulld(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMULLD, 0x40, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmuludq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2){AppendInstr(I_PMULUDQ, 0xF4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmuludq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2){AppendInstr(I_PMULUDQ, 0xF4, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmuldq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PMULDQ, 0x28, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpmuldq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PMULDQ, 0x28, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpor(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_POR, 0xEB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpor(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_POR, 0xEB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsadbw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSADBW, 0xF6, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsadbw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSADBW, 0xF6, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpshufb(const XmmReg& dst, const XmmReg& src, const XmmReg& order) {AppendInstr(I_PSHUFB, 0x00, E_VEX_128_66_0F38_WIG, W(dst), R(order), R(src));}
|
|
void vpshufb(const XmmReg& dst, const XmmReg& src, const Mem128& order) {AppendInstr(I_PSHUFB, 0x00, E_VEX_128_66_0F38_WIG, W(dst), R(order), R(src));}
|
|
void vpshufd(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFD, 0x70, E_VEX_128_66_0F_WIG, W(dst), R(src), order);}
|
|
void vpshufd(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFD, 0x70, E_VEX_128_66_0F_WIG, W(dst), R(src), order);}
|
|
void vpshufhw(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFHW, 0x70, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src), order);}
|
|
void vpshufhw(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFHW, 0x70, E_VEX_128 | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src), order);}
|
|
void vpshuflw(const XmmReg& dst, const XmmReg& src, const Imm8& order) {AppendInstr(I_PSHUFLW, 0x70, E_VEX_128 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src), order);}
|
|
void vpshuflw(const XmmReg& dst, const Mem128& src, const Imm8& order) {AppendInstr(I_PSHUFLW, 0x70, E_VEX_128 | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src), order);}
|
|
void vpsignb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSIGNB, 0x08, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsignb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSIGNB, 0x08, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsignw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSIGNW, 0x09, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsignw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSIGNW, 0x09, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsignd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSIGND, 0x0A, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsignd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSIGND, 0x0A, E_VEX_128_66_0F38_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsllw(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSLLW, 0xF1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsllw(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSLLW, 0xF1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsllw(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSLLW, 0x71, E_VEX_128_66_0F_WIG, Imm8(6), R(src), W(dst), count);}
|
|
void vpslld(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSLLD, 0xF2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpslld(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSLLD, 0xF2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpslld(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSLLD, 0x72, E_VEX_128_66_0F_WIG, Imm8(6), R(src), W(dst), count);}
|
|
void vpsllq(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSLLQ, 0xF3, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsllq(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSLLQ, 0xF3, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsllq(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSLLQ, 0x73, E_VEX_128_66_0F_WIG, Imm8(6), R(src), W(dst), count);}
|
|
void vpslldq(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSLLDQ, 0x73, E_VEX_128_66_0F_WIG, Imm8(7), R(src), W(dst), count);}
|
|
void vpsraw(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSRAW, 0xE1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsraw(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSRAW, 0xE1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsraw(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRAW, 0x71, E_VEX_128_66_0F_WIG, Imm8(4), R(src), W(dst), count);}
|
|
void vpsrad(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSRAD, 0xE2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrad(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSRAD, 0xE2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrad(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRAD, 0x72, E_VEX_128_66_0F_WIG, Imm8(4), R(src), W(dst), count);}
|
|
void vpsrlw(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSRLW, 0xD1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrlw(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSRLW, 0xD1, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrlw(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRLW, 0x71, E_VEX_128_66_0F_WIG, Imm8(2), R(src), W(dst), count);}
|
|
void vpsrld(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSRLD, 0xD2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrld(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSRLD, 0xD2, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrld(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRLD, 0x72, E_VEX_128_66_0F_WIG, Imm8(2), R(src), W(dst), count);}
|
|
void vpsrlq(const XmmReg& dst, const XmmReg& src, const XmmReg& count) {AppendInstr(I_PSRLQ, 0xD3, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrlq(const XmmReg& dst, const XmmReg& src, const Mem128& count) {AppendInstr(I_PSRLQ, 0xD3, E_VEX_128_66_0F_WIG, W(dst), R(count), R(src));}
|
|
void vpsrlq(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRLQ, 0x73, E_VEX_128_66_0F_WIG, Imm8(2), R(src), W(dst), count);}
|
|
void vpsrldq(const XmmReg& dst, const XmmReg& src, const Imm8& count) {AppendInstr(I_PSRLDQ, 0x73, E_VEX_128_66_0F_WIG, Imm8(3), R(src), W(dst), count);}
|
|
void vptest(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PTEST, 0x17, E_VEX_128_66_0F38_WIG, R(src1), R(src2));}
|
|
void vptest(const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PTEST, 0x17, E_VEX_128_66_0F38_WIG, R(src1), R(src2));}
|
|
void vptest(const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_PTEST, 0x17, E_VEX_256_66_0F38_WIG, R(src1), R(src2));}
|
|
void vptest(const YmmReg& src1, const Mem256& src2) {AppendInstr(I_PTEST, 0x17, E_VEX_256_66_0F38_WIG, R(src1), R(src2));}
|
|
void vtestps(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VTESTPS, 0x0E, E_VEX_128_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestps(const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VTESTPS, 0x0E, E_VEX_128_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestps(const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VTESTPS, 0x0E, E_VEX_256_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestps(const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VTESTPS, 0x0E, E_VEX_256_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestpd(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VTESTPD, 0x0F, E_VEX_128_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestpd(const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VTESTPD, 0x0F, E_VEX_128_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestpd(const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VTESTPD, 0x0F, E_VEX_256_66_0F38_W0, R(src1), R(src2));}
|
|
void vtestpd(const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VTESTPD, 0x0F, E_VEX_256_66_0F38_W0, R(src1), R(src2));}
|
|
void vpsubb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBB, 0xF8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBB, 0xF8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBW, 0xF9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBW, 0xF9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBD, 0xFA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBD, 0xFA, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBQ, 0xFB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBQ, 0xFB, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubsb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBSB, 0xE8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubsb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBSB, 0xE8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubsw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBSW, 0xE9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubsw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBSW, 0xE9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubusb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBUSB, 0xD8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubusb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBUSB, 0xD8, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubusw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PSUBUSW, 0xD9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpsubusw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PSUBUSW, 0xD9, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhbw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKHBW, 0x68, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhbw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKHBW, 0x68, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhwd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKHWD, 0x69, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhwd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKHWD, 0x69, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhdq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKHDQ, 0x6A, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhdq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKHDQ, 0x6A, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhqdq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKHQDQ, 0x6D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckhqdq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKHQDQ, 0x6D, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklbw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKLBW, 0x60, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklbw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKLBW, 0x60, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklwd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKLWD, 0x61, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklwd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKLWD, 0x61, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckldq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKLDQ, 0x62, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpckldq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKLDQ, 0x62, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklqdq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PUNPCKLQDQ, 0x6C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpunpcklqdq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PUNPCKLQDQ, 0x6C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpxor(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_PXOR, 0xEF, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vpxor(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_PXOR, 0xEF, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vrcpps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RCPPS, 0x53, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vrcpps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_RCPPS, 0x53, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vrcpps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_RCPPS, 0x53, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vrcpps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_RCPPS, 0x53, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vrcpss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_RCPSS, 0x53, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vrcpss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_RCPSS, 0x53, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vrsqrtps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_RSQRTPS, 0x52, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vrsqrtps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_RSQRTPS, 0x52, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vrsqrtps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_RSQRTPS, 0x52, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vrsqrtps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_RSQRTPS, 0x52, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vrsqrtss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2){AppendInstr(I_RSQRTSS, 0x52, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vrsqrtss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_RSQRTSS, 0x52, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vroundpd(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x09, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundpd(const XmmReg& dst, const Mem128& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x09, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundpd(const YmmReg& dst, const YmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x09, E_VEX_256 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundpd(const YmmReg& dst, const Mem256& src, const Imm8& mode) {AppendInstr(I_ROUNDPD, 0x09, E_VEX_256 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundps(const XmmReg& dst, const XmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x08, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundps(const XmmReg& dst, const Mem128& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x08, E_VEX_128 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundps(const YmmReg& dst, const YmmReg& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x08, E_VEX_256 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundps(const YmmReg& dst, const Mem256& src, const Imm8& mode) {AppendInstr(I_ROUNDPS, 0x08, E_VEX_256 | E_VEX_66_0F3A | E_VEX_WIG, W(dst), R(src), mode);}
|
|
void vroundsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_ROUNDSD, 0x0B, E_VEX_LIG | E_VEX_66_0F3A | E_VEX_WIG, RW(dst), R(src2), R(src1), mode);}
|
|
void vroundsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const Imm8& mode) {AppendInstr(I_ROUNDSD, 0x0B, E_VEX_LIG | E_VEX_66_0F3A | E_VEX_WIG, RW(dst), R(src2), R(src1), mode);}
|
|
void vroundss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& mode) {AppendInstr(I_ROUNDSS, 0x0A, E_VEX_LIG | E_VEX_66_0F3A | E_VEX_WIG, RW(dst), R(src2), R(src1), mode);}
|
|
void vroundss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const Imm8& mode) {AppendInstr(I_ROUNDSS, 0x0A, E_VEX_LIG | E_VEX_66_0F3A | E_VEX_WIG, RW(dst), R(src2), R(src1), mode);}
|
|
void vshufpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& sel) {AppendInstr(I_SHUFPD, 0xC6, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& sel) {AppendInstr(I_SHUFPD, 0xC6, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& sel) {AppendInstr(I_SHUFPD, 0xC6, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& sel) {AppendInstr(I_SHUFPD, 0xC6, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& sel) {AppendInstr(I_SHUFPS, 0xC6, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& sel) {AppendInstr(I_SHUFPS, 0xC6, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Imm8& sel) {AppendInstr(I_SHUFPS, 0xC6, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vshufps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const Imm8& sel) {AppendInstr(I_SHUFPS, 0xC6, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1), sel);}
|
|
void vsqrtpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTPD, 0x51, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vsqrtpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SQRTPD, 0x51, E_VEX_128_66_0F_WIG, W(dst), R(src));}
|
|
void vsqrtpd(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_SQRTPD, 0x51, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vsqrtpd(const YmmReg& dst, const Mem256& src) {AppendInstr(I_SQRTPD, 0x51, E_VEX_256_66_0F_WIG, W(dst), R(src));}
|
|
void vsqrtps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_SQRTPS, 0x51, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vsqrtps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_SQRTPS, 0x51, E_VEX_128_0F_WIG, W(dst), R(src));}
|
|
void vsqrtps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_SQRTPS, 0x51, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vsqrtps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_SQRTPS, 0x51, E_VEX_256_0F_WIG, W(dst), R(src));}
|
|
void vsqrtsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SQRTSD, 0x51, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsqrtsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_SQRTSD, 0x51, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsqrtss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SQRTSS, 0x51, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsqrtss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_SQRTSS, 0x51, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vstmxcsr(const Mem32& dst) {AppendInstr(I_STMXCSR, 0xAE, E_VEX_LZ | E_VEX_0F | E_VEX_WIG, Imm8(3), W(dst));}
|
|
void vsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SUBPD, 0x5C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_SUBPD, 0x5C, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_SUBPD, 0x5C, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_SUBPD, 0x5C, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SUBPS, 0x5C, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_SUBPS, 0x5C, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_SUBPS, 0x5C, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_SUBPS, 0x5C, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SUBSD, 0x5C, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_SUBSD, 0x5C, E_VEX_LIG | E_VEX_F2_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_SUBSS, 0x5C, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vsubss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2) {AppendInstr(I_SUBSS, 0x5C, E_VEX_LIG | E_VEX_F3_0F | E_VEX_WIG, W(dst), R(src2), R(src1));}
|
|
void vucomisd(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UCOMISD, 0x2E, E_VEX_LIG | E_VEX_66_0F | E_VEX_WIG, R(src1), R(src2));}
|
|
void vucomisd(const XmmReg& src1, const Mem64& src2) {AppendInstr(I_UCOMISD, 0x2E, E_VEX_LIG | E_VEX_66_0F | E_VEX_WIG, R(src1), R(src2));}
|
|
void vucomiss(const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UCOMISS, 0x2E, E_VEX_LIG | E_VEX_0F | E_VEX_WIG, R(src1), R(src2));}
|
|
void vucomiss(const XmmReg& src1, const Mem32& src2) {AppendInstr(I_UCOMISS, 0x2E, E_VEX_LIG | E_VEX_0F | E_VEX_WIG, R(src1), R(src2));}
|
|
void vunpckhpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UNPCKHPD, 0x15, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_UNPCKHPD, 0x15, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_UNPCKHPD, 0x15, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_UNPCKHPD, 0x15, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UNPCKHPS, 0x15, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_UNPCKHPS, 0x15, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_UNPCKHPS, 0x15, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpckhps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_UNPCKHPS, 0x15, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UNPCKLPD, 0x14, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_UNPCKLPD, 0x14, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_UNPCKLPD, 0x14, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_UNPCKLPD, 0x14, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_UNPCKLPS, 0x14, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_UNPCKLPS, 0x14, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_UNPCKLPS, 0x14, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vunpcklps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_UNPCKLPS, 0x14, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_128_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_256_66_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_128_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vxorps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_XORPS, 0x57, E_VEX_256_0F_WIG, W(dst), R(src2), R(src1));}
|
|
void vzeroall() {AppendInstr(I_VZEROUPPER, 0x77, E_VEX_256_0F_WIG);}
|
|
void vzeroupper() {AppendInstr(I_VZEROUPPER, 0x77, E_VEX_128_0F_WIG);}
|
|
|
|
// FMA
|
|
void vfmadd132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD132PD, 0x98, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD132PD, 0x98, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD132PD, 0x98, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD132PD, 0x98, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD213PD, 0xA8, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD213PD, 0xA8, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD213PD, 0xA8, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD213PD, 0xA8, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD231PD, 0xB8, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD231PD, 0xB8, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD231PD, 0xB8, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD231PD, 0xB8, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD132PS, 0x98, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD132PS, 0x98, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD132PS, 0x98, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD132PS, 0x98, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD213PS, 0xA8, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD213PS, 0xA8, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD213PS, 0xA8, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD213PS, 0xA8, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD231PS, 0xB8, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADD231PS, 0xB8, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADD231PS, 0xB8, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADD231PS, 0xB8, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD132SD, 0x99, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD132SD, 0x99, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD213SD, 0xA9, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD213SD, 0xA9, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD231SD, 0xB9, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD231SD, 0xB9, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD132SS, 0x99, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd132ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD132SS, 0x99, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD213SS, 0xA9, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd213ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD213SS, 0xA9, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADD231SS, 0xB9, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmadd231ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMADD231SS, 0xB9, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB132PD, 0x96, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB132PD, 0x96, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB132PD, 0x96, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB132PD, 0x96, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB213PD, 0xA6, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB213PD, 0xA6, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB213PD, 0xA6, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB213PD, 0xA6, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB231PD, 0xB6, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB231PD, 0xB6, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB231PD, 0xB6, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB231PD, 0xB6, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB132PS, 0x96, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB132PS, 0x96, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB132PS, 0x96, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB132PS, 0x96, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB213PS, 0xA6, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB213PS, 0xA6, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB213PS, 0xA6, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB213PS, 0xA6, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMADDSUB231PS, 0xB6, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMADDSUB231PS, 0xB6, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMADDSUB231PS, 0xB6, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmaddsub231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMADDSUB231PS, 0xB6, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD132PD, 0x97, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD132PD, 0x97, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD132PD, 0x97, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD132PD, 0x97, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD213PD, 0xA7, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD213PD, 0xA7, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD213PD, 0xA7, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD213PD, 0xA7, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD231PD, 0xB7, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD231PD, 0xB7, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD231PD, 0xB7, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD231PD, 0xB7, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD132PS, 0x97, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD132PS, 0x97, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD132PS, 0x97, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD132PS, 0x97, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD213PS, 0xA7, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD213PS, 0xA7, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD213PS, 0xA7, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD213PS, 0xA7, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUBADD231PS, 0xB7, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUBADD231PS, 0xB7, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUBADD231PS, 0xB7, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsubadd231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUBADD231PS, 0xB7, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB132PD, 0x9A, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB132PD, 0x9A, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB132PD, 0x9A, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB132PD, 0x9A, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB213PD, 0xAA, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB213PD, 0xAA, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB213PD, 0xAA, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB213PD, 0xAA, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB231PD, 0xBA, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB231PD, 0xBA, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB231PD, 0xBA, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB231PD, 0xBA, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB132PS, 0x9A, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB132PS, 0x9A, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB132PS, 0x9A, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB132PS, 0x9A, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB213PS, 0xAA, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB213PS, 0xAA, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB213PS, 0xAA, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB213PS, 0xAA, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB231PS, 0xBA, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFMSUB231PS, 0xBA, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFMSUB231PS, 0xBA, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFMSUB231PS, 0xBA, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB132SD, 0x9B, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB132SD, 0x9B, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB213SD, 0xAB, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB213SD, 0xAB, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB231SD, 0xBB, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB231SD, 0xBB, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB132SS, 0x9B, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub132ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB132SS, 0x9B, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB213SS, 0xAB, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub213ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB213SS, 0xAB, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFMSUB231SS, 0xBB, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfmsub231ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFMSUB231SS, 0xBB, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD132PD, 0x9C, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD132PD, 0x9C, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD132PD, 0x9C, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD132PD, 0x9C, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD213PD, 0xAC, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD213PD, 0xAC, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD213PD, 0xAC, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD213PD, 0xAC, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD231PD, 0xBC, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD231PD, 0xBC, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD231PD, 0xBC, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD231PD, 0xBC, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD132PS, 0x9C, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD132PS, 0x9C, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD132PS, 0x9C, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD132PS, 0x9C, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD213PS, 0xAC, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD213PS, 0xAC, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD213PS, 0xAC, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD213PS, 0xAC, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD231PS, 0xBC, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMADD231PS, 0xBC, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMADD231PS, 0xBC, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMADD231PS, 0xBC, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD132SD, 0x9D, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD132SD, 0x9D, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD213SD, 0xAD, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD213SD, 0xAD, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD231SD, 0xBD, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD231SD, 0xBD, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD132SS, 0x9D, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd132ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD132SS, 0x9D, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD213SS, 0xAD, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd213ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD213SS, 0xAD, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMADD231SS, 0xBD, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmadd231ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMADD231SS, 0xBD, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB132PD, 0x9E, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB132PD, 0x9E, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB132PD, 0x9E, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB132PD, 0x9E, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB213PD, 0xAE, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB213PD, 0xAE, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB213PD, 0xAE, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB213PD, 0xAE, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB231PD, 0xBE, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB231PD, 0xBE, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB231PD, 0xBE, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB231PD, 0xBE, E_VEX_256_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB132PS, 0x9E, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB132PS, 0x9E, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB132PS, 0x9E, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB132PS, 0x9E, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB213PS, 0xAE, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB213PS, 0xAE, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB213PS, 0xAE, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB213PS, 0xAE, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB231PS, 0xBE, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2) {AppendInstr(I_VFNMSUB231PS, 0xBE, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2) {AppendInstr(I_VFNMSUB231PS, 0xBE, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2) {AppendInstr(I_VFNMSUB231PS, 0xBE, E_VEX_256_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB132SD, 0x9F, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB132SD, 0x9F, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB213SD, 0xAF, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB213SD, 0xAF, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231sd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB231SD, 0xBF, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231sd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB231SD, 0xBF, E_VEX_128_66_0F38_W1, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB132SS, 0x9F, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub132ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB132SS, 0x9F, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB213SS, 0xAF, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub213ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB213SS, 0xAF, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2) {AppendInstr(I_VFNMSUB231SS, 0xBF, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
void vfnmsub231ss(const XmmReg& dst, const XmmReg& src1, const Mem64& src2) {AppendInstr(I_VFNMSUB231SS, 0xBF, E_VEX_128_66_0F38_W0, RW(dst), R(src2), R(src1));}
|
|
|
|
// F16C
|
|
#ifdef JITASM64
|
|
void rdfsbase(const Reg32& dst) {AppendInstr(I_RDFSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3, Imm8(0), W(dst));}
|
|
void rdfsbase(const Reg64& dst) {AppendInstr(I_RDFSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, Imm8(0), W(dst));}
|
|
void rdgsbase(const Reg32& dst) {AppendInstr(I_RDGSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3, Imm8(1), W(dst));}
|
|
void rdgsbase(const Reg64& dst) {AppendInstr(I_RDGSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, Imm8(1), W(dst));}
|
|
#endif
|
|
void rdrand(const Reg16& dst) {AppendInstr(I_RDRAND, 0x0FC7, E_OPERAND_SIZE_PREFIX, Imm8(6), W(dst));}
|
|
void rdrand(const Reg32& dst) {AppendInstr(I_RDRAND, 0x0FC7, 0, Imm8(6), W(dst));}
|
|
#ifdef JITASM64
|
|
void rdrand(const Reg64& dst) {AppendInstr(I_RDRAND, 0x0FC7, E_REXW_PREFIX, Imm8(6), W(dst));}
|
|
#endif
|
|
#ifdef JITASM64
|
|
void wrfsbase(const Reg32& src) {AppendInstr(I_WRFSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3, Imm8(2), R(src));}
|
|
void wrfsbase(const Reg64& src) {AppendInstr(I_WRFSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, Imm8(2), R(src));}
|
|
void wrgsbase(const Reg32& src) {AppendInstr(I_WRGSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3, Imm8(3), R(src));}
|
|
void wrgsbase(const Reg64& src) {AppendInstr(I_WRGSBASE, 0x0FAE, E_MANDATORY_PREFIX_F3 | E_REXW_PREFIX, Imm8(3), R(src));}
|
|
#endif
|
|
void vcvtph2ps(const YmmReg& dst, const XmmReg& src) {AppendInstr(I_VCVTPH2PS, 0x13, E_VEX_256_66_0F38_W0, W(dst), R(src));}
|
|
void vcvtph2ps(const YmmReg& dst, const Mem128& src) {AppendInstr(I_VCVTPH2PS, 0x13, E_VEX_256_66_0F38_W0, W(dst), R(src));}
|
|
void vcvtph2ps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VCVTPH2PS, 0x13, E_VEX_128_66_0F38_W0, W(dst), R(src));}
|
|
void vcvtph2ps(const XmmReg& dst, const Mem64& src) {AppendInstr(I_VCVTPH2PS, 0x13, E_VEX_128_66_0F38_W0, W(dst), R(src));}
|
|
void vcvtps2ph(const XmmReg& dst, const YmmReg& src, const Imm8& rc) {AppendInstr(I_VCVTPS2PH, 0x1D, E_VEX_256_66_0F3A_W0, R(src), W(dst), rc);}
|
|
void vcvtps2ph(const Mem128& dst, const YmmReg& src, const Imm8& rc) {AppendInstr(I_VCVTPS2PH, 0x1D, E_VEX_256_66_0F3A_W0, R(src), W(dst), rc);}
|
|
void vcvtps2ph(const XmmReg& dst, const XmmReg& src, const Imm8& rc) {AppendInstr(I_VCVTPS2PH, 0x1D, E_VEX_128_66_0F3A_W0, R(src), W(dst), rc);}
|
|
void vcvtps2ph(const Mem64& dst, const XmmReg& src, const Imm8& rc) {AppendInstr(I_VCVTPS2PH, 0x1D, E_VEX_128_66_0F3A_W0, R(src), W(dst), rc);}
|
|
|
|
// XOP
|
|
void vfrczpd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VFRCZPD, 0x81, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczpd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VFRCZPD, 0x81, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczpd(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_VFRCZPD, 0x81, E_XOP_256 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczpd(const YmmReg& dst, const Mem256& src) {AppendInstr(I_VFRCZPD, 0x81, E_XOP_256 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczps(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VFRCZPS, 0x80, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczps(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VFRCZPS, 0x80, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczps(const YmmReg& dst, const YmmReg& src) {AppendInstr(I_VFRCZPS, 0x80, E_XOP_256 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczps(const YmmReg& dst, const Mem256& src) {AppendInstr(I_VFRCZPS, 0x80, E_XOP_256 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczsd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VFRCZSD, 0x83, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczsd(const XmmReg& dst, const Mem64& src) {AppendInstr(I_VFRCZSD, 0x83, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczss(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VFRCZSS, 0x82, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vfrczss(const XmmReg& dst, const Mem32& src) {AppendInstr(I_VFRCZSS, 0x82, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vpcmov(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpcmov(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpcmov(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_128 | E_XOP_M01000 | E_XOP_W1 | E_XOP_P00, W(dst), R(src3), R(src1), R(src2));}
|
|
void vpcmov(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_256 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpcmov(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_256 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpcmov(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VPCMOV, 0xA2, E_XOP_256 | E_XOP_M01000 | E_XOP_W1 | E_XOP_P00, W(dst), R(src3), R(src1), R(src2));}
|
|
void vpcomb(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMB, 0xCC, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
void vpcomb(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMB, 0xCC, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
void vpcomd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMD, 0xCE, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
void vpcomd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMD, 0xCE, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
void vpcomq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMQ, 0xCF, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
void vpcomq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMQ, 0xCF, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomub(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMUB, 0x6C, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomub(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMUB, 0x6C, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomud(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMUD, 0x6E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomud(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMUD, 0x6E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomuq(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMUQ, 0x6F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomuq(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMUQ, 0x6F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomuw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMUW, 0x6D, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomuw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMUW, 0x6D, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomw(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Imm8& type) {AppendInstr(I_VPCOMW, 0xCD, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpcomw(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const Imm8& type) {AppendInstr(I_VPCOMW, 0xCD, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), type);}
|
|
//void vpermil2pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_128 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2pd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_128 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2pd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_128 | E_XOP_M00011 | E_XOP_W1 | E_XOP_P01, W(dst), R(src3), R(src1), R(src2));}
|
|
//void vpermil2pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_256 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2pd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_256 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2pd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VPERMIL2PD, 0x49, E_XOP_256 | E_XOP_M00011 | E_XOP_W1 | E_XOP_P01, W(dst), R(src3), R(src1), R(src2));}
|
|
//void vpermil2ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_128 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2ps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_128 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2ps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_128 | E_XOP_M00011 | E_XOP_W1 | E_XOP_P01, W(dst), R(src3), R(src1), R(src2));}
|
|
//void vpermil2ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_256 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2ps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_256 | E_XOP_M00011 | E_XOP_W0 | E_XOP_P01, W(dst), R(src2), R(src1), R(src3));}
|
|
//void vpermil2ps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VPERMIL2PS, 0x48, E_XOP_256 | E_XOP_M00011 | E_XOP_W1 | E_XOP_P01, W(dst), R(src3), R(src1), R(src2));}
|
|
void vphaddbd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDBD, 0xC2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddbd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDBD, 0xC2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddbq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDBQ, 0xC3, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddbq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDBQ, 0xC3, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDBW, 0xC1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddbw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDBW, 0xC1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadddq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDDQ, 0xCB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadddq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDDQ, 0xCB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUBD, 0xD2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUBD, 0xD2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUBQ, 0xD3, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUBQ, 0xD3, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUBW, 0xD1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddubw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUBW, 0xD1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
//void vphaddudq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUDQ, 0xDB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
//void vphaddudq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUDQ, 0xDB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadduwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUWD, 0xD6, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadduwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUWD, 0xD6, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadduwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUWQ, 0xD7, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphadduwq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUWQ, 0xD7, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDUWD, 0xC6, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDUWD, 0xC6, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddwq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHADDWQ, 0xC7, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphaddwq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHADDWQ, 0xC7, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphsubbw(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHSUBBW, 0xE1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphsubbw(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHSUBBW, 0xE1, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
//void vphsubdq(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHSUBDQ, 0xDB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
//void vphsubdq(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHSUBDQ, 0xDB, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphsubwd(const XmmReg& dst, const XmmReg& src) {AppendInstr(I_VPHSUBWD, 0xE2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vphsubwd(const XmmReg& dst, const Mem128& src) {AppendInstr(I_VPHSUBWD, 0xE2, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src));}
|
|
void vpmacsdd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDD, 0x9E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsdd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDD, 0x9E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsdqh(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDQH, 0x9F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsdqh(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDQH, 0x9F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsdql(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDQL, 0x97, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsdql(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSDQL, 0x97, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDD, 0x8E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDD, 0x8E, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdqh(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDQH, 0x8F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdqh(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDQH, 0x8F, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdql(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDQL, 0x87, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssdql(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSDQL, 0x87, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsswd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSWD, 0x86, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsswd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSWD, 0x86, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssww(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSWW, 0x85, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacssww(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSSWW, 0x85, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacswd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSWD, 0x96, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacswd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSWD, 0x96, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsww(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMACSWW, 0x95, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmacsww(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMACSWW, 0x95, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmadcsswd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMADCSSWD, 0xA6, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmadcsswd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMADCSSWD, 0xA6, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmadcswd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPMADCSWD, 0xB6, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpmadcswd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPMADCSWD, 0xB6, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpperm(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VPPERM, 0xA3, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpperm(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VPPERM, 0xA3, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src2), R(src1), R(src3));}
|
|
void vpperm(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VPPERM, 0xA3, E_XOP_128 | E_XOP_M01000 | E_XOP_W1 | E_XOP_P00, W(dst), R(src3), R(src1), R(src2));}
|
|
void vprotb(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPROTB, 0x90, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotb(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPROTB, 0x90, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotb(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPROTB, 0x90, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vprotb(const XmmReg& dst, const XmmReg& src1, const Imm8& count) {AppendInstr(I_VPROTB, 0xC0, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotb(const XmmReg& dst, const Mem128& src1, const Imm8& count) {AppendInstr(I_VPROTB, 0xC0, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotd(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPROTD, 0x92, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotd(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPROTD, 0x92, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotd(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPROTD, 0x92, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vprotd(const XmmReg& dst, const XmmReg& src1, const Imm8& count) {AppendInstr(I_VPROTD, 0xC2, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotd(const XmmReg& dst, const Mem128& src1, const Imm8& count) {AppendInstr(I_VPROTD, 0xC2, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotq(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPROTQ, 0x93, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotq(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPROTQ, 0x93, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotq(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPROTQ, 0x93, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vprotq(const XmmReg& dst, const XmmReg& src1, const Imm8& count) {AppendInstr(I_VPROTQ, 0xC3, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotq(const XmmReg& dst, const Mem128& src1, const Imm8& count) {AppendInstr(I_VPROTQ, 0xC3, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotw(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPROTW, 0x91, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotw(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPROTW, 0x91, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vprotw(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPROTW, 0x91, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vprotw(const XmmReg& dst, const XmmReg& src1, const Imm8& count) {AppendInstr(I_VPROTW, 0xC1, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vprotw(const XmmReg& dst, const Mem128& src1, const Imm8& count) {AppendInstr(I_VPROTW, 0xC1, E_XOP_128 | E_XOP_M01000 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), count);}
|
|
void vpshab(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHAB, 0x98, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshab(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHAB, 0x98, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshab(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHAB, 0x98, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshad(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHAD, 0x9A, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshad(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHAD, 0x9A, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshad(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHAD, 0x9A, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshaq(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHAQ, 0x9B, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshaq(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHAQ, 0x9B, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshaq(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHAQ, 0x9B, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshaw(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHAW, 0x99, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshaw(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHAW, 0x99, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshaw(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHAW, 0x99, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshlb(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHLB, 0x94, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlb(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHLB, 0x94, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlb(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHLB, 0x94, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshld(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHLD, 0x96, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshld(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHLD, 0x96, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshld(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHLD, 0x96, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshlq(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHLQ, 0x97, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlq(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHLQ, 0x97, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlq(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHLQ, 0x97, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
void vpshlw(const XmmReg& dst, const XmmReg& src1, const XmmReg& count) {AppendInstr(I_VPSHLW, 0x95, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlw(const XmmReg& dst, const Mem128& src1, const XmmReg& count) {AppendInstr(I_VPSHLW, 0x95, E_XOP_128 | E_XOP_M01001 | E_XOP_W0 | E_XOP_P00, W(dst), R(src1), R(count));}
|
|
void vpshlw(const XmmReg& dst, const XmmReg& src1, const Mem128& count) {AppendInstr(I_VPSHLW, 0x95, E_XOP_128 | E_XOP_M01001 | E_XOP_W1 | E_XOP_P00, W(dst), R(count), R(src1));}
|
|
|
|
// FMA4
|
|
void vfmaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMADDPD, 0x69, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMADDPS, 0x68, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSD, 0x6B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSD, 0x6B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem64& src3) {AppendInstr(I_VFMADDSD, 0x6B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSS, 0x6A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSS, 0x6A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem32& src3) {AppendInstr(I_VFMADDSS, 0x6A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMADDSUBPD, 0x5D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmaddsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmaddsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMADDSUBPS, 0x5C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMSUBADDPD, 0x5F, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMSUBADDPS, 0x5E, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMSUBPD, 0x6D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFMSUBPS, 0x6C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBSD, 0x6F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBSD, 0x6F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem64& src3) {AppendInstr(I_VFMSUBSD, 0x6F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfmsubss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBSS, 0x6E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const XmmReg& src3) {AppendInstr(I_VFMSUBSS, 0x6E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfmsubss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem32& src3) {AppendInstr(I_VFMSUBSS, 0x6E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFNMADDPD, 0x79, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFNMADDPS, 0x78, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDSD, 0x7B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDSD, 0x7B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem64& src3) {AppendInstr(I_VFNMADDSD, 0x7B, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmaddss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDSS, 0x7A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const XmmReg& src3) {AppendInstr(I_VFNMADDSS, 0x7A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmaddss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem32& src3) {AppendInstr(I_VFNMADDSS, 0x7A, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubpd(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubpd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubpd(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubpd(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFNMSUBPD, 0x7D, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubps(const XmmReg& dst, const XmmReg& src1, const Mem128& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubps(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem128& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const YmmReg& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubps(const YmmReg& dst, const YmmReg& src1, const Mem256& src2, const YmmReg& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubps(const YmmReg& dst, const YmmReg& src1, const YmmReg& src2, const Mem256& src3) {AppendInstr(I_VFNMSUBPS, 0x7C, E_VEX_256 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBSD, 0x7F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubsd(const XmmReg& dst, const XmmReg& src1, const Mem64& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBSD, 0x7F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubsd(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem64& src3) {AppendInstr(I_VFNMSUBSD, 0x7F, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
void vfnmsubss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBSS, 0x7E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubss(const XmmReg& dst, const XmmReg& src1, const Mem32& src2, const XmmReg& src3) {AppendInstr(I_VFNMSUBSS, 0x7E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W0, W(dst), R(src2), R(src1), R(src3));}
|
|
void vfnmsubss(const XmmReg& dst, const XmmReg& src1, const XmmReg& src2, const Mem32& src3) {AppendInstr(I_VFNMSUBSS, 0x7E, E_VEX_128 | E_VEX_0F3A | E_VEX_66 | E_VEX_W1, W(dst), R(src3), R(src1), R(src2));}
|
|
|
|
struct ControlState
|
|
{
|
|
size_t label1;
|
|
size_t label2;
|
|
};
|
|
ControlState ctrl_state_;
|
|
std::deque<jitasm::Frontend::ControlState> ctrl_state_stack_;
|
|
|
|
// Repeat, Until
|
|
void Repeat()
|
|
{
|
|
ctrl_state_stack_.push_back(ctrl_state_);
|
|
ctrl_state_.label1 = NewLabelID(""); // begin
|
|
ctrl_state_.label2 = 0;
|
|
|
|
L(ctrl_state_.label1);
|
|
}
|
|
template<class Ty>
|
|
void Until(const Ty& expr)
|
|
{
|
|
size_t label = NewLabelID("");
|
|
expr(*this, ctrl_state_.label1, label);
|
|
L(label);
|
|
|
|
ctrl_state_ = *ctrl_state_stack_.rbegin();
|
|
ctrl_state_stack_.pop_back();
|
|
}
|
|
|
|
// While, EndW
|
|
template<class Ty>
|
|
void While(const Ty& expr)
|
|
{
|
|
ctrl_state_stack_.push_back(ctrl_state_);
|
|
ctrl_state_.label1 = NewLabelID(""); // begin
|
|
ctrl_state_.label2 = NewLabelID(""); // end
|
|
|
|
size_t label = NewLabelID("");
|
|
L(ctrl_state_.label1);
|
|
expr(*this, label, ctrl_state_.label2);
|
|
L(label);
|
|
}
|
|
void EndW()
|
|
{
|
|
AppendJmp(ctrl_state_.label1);
|
|
L(ctrl_state_.label2);
|
|
|
|
ctrl_state_ = *ctrl_state_stack_.rbegin();
|
|
ctrl_state_stack_.pop_back();
|
|
}
|
|
|
|
// If, ElseIf, Else, EndIf
|
|
template<class Ty>
|
|
void If(const Ty& expr)
|
|
{
|
|
ctrl_state_stack_.push_back(ctrl_state_);
|
|
ctrl_state_.label1 = NewLabelID(""); // else
|
|
ctrl_state_.label2 = NewLabelID(""); // end
|
|
|
|
size_t label = NewLabelID("");
|
|
expr(*this, label, ctrl_state_.label1);
|
|
L(label);
|
|
}
|
|
template<class Ty>
|
|
void ElseIf(const Ty& expr)
|
|
{
|
|
Else();
|
|
|
|
size_t label = NewLabelID("");
|
|
expr(*this, label, ctrl_state_.label1);
|
|
L(label);
|
|
}
|
|
void Else()
|
|
{
|
|
AppendJmp(ctrl_state_.label2);
|
|
L(ctrl_state_.label1);
|
|
ctrl_state_.label1 = NewLabelID("");
|
|
}
|
|
void EndIf()
|
|
{
|
|
L(ctrl_state_.label1);
|
|
L(ctrl_state_.label2);
|
|
|
|
ctrl_state_ = *ctrl_state_stack_.rbegin();
|
|
ctrl_state_stack_.pop_back();
|
|
}
|
|
};
|
|
|
|
namespace compiler
|
|
{
|
|
struct BitVector : std::vector<uint32>
|
|
{
|
|
size_t size_bit() const { return size() * 32; }
|
|
|
|
bool get_bit(size_t idx) const
|
|
{
|
|
const size_t i = idx / 32;
|
|
return i < size() && (at(i) & (1 << (idx % 32))) != 0;
|
|
}
|
|
|
|
void set_bit(size_t idx, bool b)
|
|
{
|
|
const size_t i = idx / 32;
|
|
const uint32 mask = (1 << (idx % 32));
|
|
if (i >= size()) resize(i + 1);
|
|
if (b) at(i) |= mask;
|
|
else at(i) &= ~mask;
|
|
}
|
|
|
|
bool is_equal(const BitVector& rhs) const
|
|
{
|
|
const size_t min_size = size() < rhs.size() ? size() : rhs.size();
|
|
for (size_t i = 0; i < min_size; ++i) {
|
|
if (at(i) != rhs[i]) return false;
|
|
}
|
|
|
|
const BitVector& larger = size() < rhs.size() ? rhs : *this;
|
|
for (size_t i = min_size; i < larger.size(); ++i) {
|
|
if (larger[i] != 0) return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
size_t count_bit() const
|
|
{
|
|
size_t count = 0;
|
|
for (size_t i = 0; i < size(); ++i) {
|
|
count += detail::Count1Bits(at(i));
|
|
}
|
|
return count;
|
|
}
|
|
|
|
void get_bit_indexes(std::vector<size_t>& indexes) const
|
|
{
|
|
indexes.clear();
|
|
for (size_t i = 0; i < size(); ++i) {
|
|
uint32 m = at(i);
|
|
while (m != 0) {
|
|
uint32 index = detail::bit_scan_forward(m);
|
|
indexes.push_back(static_cast<uint32>(i * 32) + index);
|
|
m &= ~(1 << index);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class Fn>
|
|
void query_bit_indexes(Fn& fn) const
|
|
{
|
|
for (size_t i = 0; i < size(); ++i) {
|
|
uint32 m = at(i);
|
|
while (m != 0) {
|
|
uint32 index = detail::bit_scan_forward(m);
|
|
fn(i * 32 + index);
|
|
m &= ~(1 << index);
|
|
}
|
|
}
|
|
}
|
|
|
|
void set_union(const BitVector& rhs)
|
|
{
|
|
if (size() < rhs.size()) resize(rhs.size());
|
|
for (size_t i = 0; i < rhs.size(); ++i) {
|
|
at(i) |= rhs[i];
|
|
}
|
|
}
|
|
|
|
void set_subtract(const BitVector& rhs)
|
|
{
|
|
const size_t min_size = size() < rhs.size() ? size() : rhs.size();
|
|
for (size_t i = 0; i < min_size; ++i) {
|
|
at(i) &= ~rhs[i];
|
|
}
|
|
}
|
|
};
|
|
|
|
template<class T, size_t N>
|
|
class FixedArray
|
|
{
|
|
private:
|
|
T data_[N];
|
|
size_t size_;
|
|
|
|
public:
|
|
FixedArray() : size_(0) {}
|
|
bool empty() const {return size_ == 0;}
|
|
size_t size() const {return size_;}
|
|
void clear() {size_ = 0;}
|
|
|
|
void push_back(const T& v) {data_[size_++] = v;}
|
|
void pop_back() {--size_;}
|
|
|
|
const T& operator[](size_t i) const {return data_[i];}
|
|
T& operator[](size_t i) {return data_[i];}
|
|
|
|
const T& back() const {return data_[size_ - 1];}
|
|
T& back() {return data_[size_ - 1];}
|
|
};
|
|
|
|
/// Register family
|
|
inline size_t GetRegFamily(RegType type)
|
|
{
|
|
switch (type) {
|
|
case R_TYPE_GP: return 0;
|
|
case R_TYPE_MMX: return 1;
|
|
case R_TYPE_XMM: return 2;
|
|
case R_TYPE_YMM: return 2;
|
|
case R_TYPE_SYMBOLIC_GP: return 0;
|
|
case R_TYPE_SYMBOLIC_MMX: return 1;
|
|
case R_TYPE_SYMBOLIC_XMM: return 2;
|
|
case R_TYPE_SYMBOLIC_YMM: return 2;
|
|
case R_TYPE_FPU:
|
|
default:
|
|
JITASM_ASSERT(0);
|
|
return 0x7FFFFFFF;
|
|
}
|
|
}
|
|
|
|
inline std::string GetRegName(RegType type, size_t reg_idx)
|
|
{
|
|
#ifdef JITASM64
|
|
const static std::string s_gp_reg_name[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"};
|
|
#else
|
|
const static std::string s_gp_reg_name[] = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"};
|
|
#endif
|
|
std::string name;
|
|
if (type == R_TYPE_GP) {return s_gp_reg_name[reg_idx];}
|
|
else if (type == R_TYPE_MMX) {name.assign("mm");}
|
|
else if (type == R_TYPE_XMM) {name.assign("xmm");}
|
|
else if (type == R_TYPE_YMM) {name.assign("ymm");}
|
|
else if (type == R_TYPE_SYMBOLIC_GP) {name.assign("gpsym"); reg_idx -= NUM_OF_PHYSICAL_REG;}
|
|
else if (type == R_TYPE_SYMBOLIC_MMX) {name.assign("mmsym"); reg_idx -= NUM_OF_PHYSICAL_REG;}
|
|
else if (type == R_TYPE_SYMBOLIC_XMM) {name.assign("xmmsym"); reg_idx -= NUM_OF_PHYSICAL_REG;}
|
|
else if (type == R_TYPE_SYMBOLIC_YMM) {name.assign("ymmsym"); reg_idx -= NUM_OF_PHYSICAL_REG;}
|
|
detail::append_num(name, reg_idx);
|
|
return name;
|
|
}
|
|
|
|
/// Variable attribute
|
|
struct VarAttribute
|
|
{
|
|
uint8 size : 7;
|
|
bool spill : 1;
|
|
Addr stack_slot;
|
|
VarAttribute() : size(0), spill(false), stack_slot(RegID::Invalid(), 0) {}
|
|
};
|
|
|
|
/// Variable manager
|
|
class VariableManager
|
|
{
|
|
private:
|
|
std::vector<VarAttribute> attributes_[3]; // GP, MMX, XMM/YMM
|
|
|
|
public:
|
|
std::vector<VarAttribute>& GetAttributes(size_t reg_family) {return attributes_[reg_family];}
|
|
const std::vector<VarAttribute>& GetAttributes(size_t reg_family) const {return attributes_[reg_family];}
|
|
|
|
/// Get variable size
|
|
size_t GetVarSize(size_t reg_family, int var) const
|
|
{
|
|
return attributes_[reg_family][var].size;
|
|
}
|
|
|
|
/// Update variable size
|
|
void UpdateVarSize(RegType reg_type, int var, size_t size)
|
|
{
|
|
const size_t reg_family = GetRegFamily(reg_type);
|
|
if (static_cast<size_t>(var) >= attributes_[reg_family].size()) {
|
|
attributes_[reg_family].resize(var + 1);
|
|
}
|
|
|
|
if (attributes_[reg_family][var].size < size) {
|
|
attributes_[reg_family][var].size = static_cast<uint8>(size);
|
|
}
|
|
}
|
|
|
|
/// Get stack slot for spill register
|
|
Addr GetSpillSlot(size_t reg_family, int var) const
|
|
{
|
|
return attributes_[reg_family][var].stack_slot;
|
|
}
|
|
|
|
/// Set stack slot for spill register
|
|
void SetSpillSlot(RegType reg_type, int var, const Addr& stack_slot)
|
|
{
|
|
const size_t reg_family = GetRegFamily(reg_type);
|
|
if (static_cast<size_t>(var) >= attributes_[reg_family].size()) {
|
|
attributes_[reg_family].resize(var + 1);
|
|
}
|
|
attributes_[reg_family][var].stack_slot = stack_slot;
|
|
}
|
|
|
|
/// Allocate stack of spill slots
|
|
void AllocSpillSlots(detail::StackManager& stack_manager)
|
|
{
|
|
// YMM
|
|
for (size_t i = 0; i < attributes_[2].size(); ++i) {
|
|
if (attributes_[2][i].spill && attributes_[2][i].size == 256 / 8 && attributes_[2][i].stack_slot.reg_.IsInvalid()) {
|
|
attributes_[2][i].stack_slot = stack_manager.Alloc(256 / 8, 16);
|
|
}
|
|
}
|
|
|
|
// XMM
|
|
for (size_t i = 0; i < attributes_[2].size(); ++i) {
|
|
if (attributes_[2][i].spill && attributes_[2][i].size == 128 / 8 && attributes_[2][i].stack_slot.reg_.IsInvalid()) {
|
|
attributes_[2][i].stack_slot = stack_manager.Alloc(128 / 8, 16);
|
|
}
|
|
}
|
|
|
|
// MMX
|
|
for (size_t i = 0; i < attributes_[1].size(); ++i) {
|
|
if (attributes_[1][i].spill && attributes_[1][i].stack_slot.reg_.IsInvalid()) {
|
|
attributes_[1][i].stack_slot = stack_manager.Alloc(64 / 8, 8);
|
|
}
|
|
}
|
|
|
|
// GP
|
|
for (size_t i = 0; i < attributes_[0].size(); ++i) {
|
|
if (attributes_[0][i].spill && attributes_[0][i].stack_slot.reg_.IsInvalid()) {
|
|
#ifdef JITASM64
|
|
attributes_[0][i].stack_slot = stack_manager.Alloc(64 / 8, 8);
|
|
#else
|
|
attributes_[0][i].stack_slot = stack_manager.Alloc(32 / 8, 4);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Register use point
|
|
struct RegUsePoint
|
|
{
|
|
size_t instr_idx; ///< Instruction index offset from basic block start point
|
|
OpdType type; ///< Operand type
|
|
uint32 reg_assignable; ///< Register assignment constraint
|
|
|
|
RegUsePoint(size_t idx, OpdType t, uint32 assignable) : instr_idx(idx), type(t), reg_assignable(assignable) {}
|
|
|
|
bool operator<(const RegUsePoint& rhs) const {
|
|
if (instr_idx == rhs.instr_idx) {
|
|
// R < RW < W
|
|
const int lhs_type = (type & O_TYPE_READ ? -1 : 0) + (type & O_TYPE_WRITE ? 1 : 0);
|
|
const int rhs_type = (rhs.type & O_TYPE_READ ? -1 : 0) + (rhs.type & O_TYPE_WRITE ? 1 : 0);
|
|
return lhs_type < rhs_type;
|
|
}
|
|
return instr_idx < rhs.instr_idx;
|
|
}
|
|
};
|
|
|
|
/// Variable lifetime
|
|
struct Lifetime
|
|
{
|
|
typedef detail::Range< std::vector<RegUsePoint> > RegUsePointRange;
|
|
typedef detail::ConstRange< std::vector<RegUsePoint> > ConstRegUsePointRange;
|
|
|
|
struct Interval
|
|
{
|
|
size_t instr_idx_offset; ///< Instruction index offset from basic block start point
|
|
BitVector liveness; ///< The set of live variables
|
|
BitVector use; ///< The set of used variables in this interval
|
|
BitVector spill; ///< The set of spilled variables
|
|
std::vector<uint32> reg_assignables; ///< The constraints of register allocation
|
|
std::vector<int> assignment_table; ///< Register assignment table
|
|
|
|
Interval(size_t instr_idx, const std::vector<uint32>& assignables) : instr_idx_offset(instr_idx), reg_assignables(assignables) {}
|
|
Interval(size_t instr_idx, const BitVector& l, const BitVector& s, const std::vector<uint32>& assignables) : instr_idx_offset(instr_idx), liveness(l), spill(s), reg_assignables(assignables) {}
|
|
|
|
void UpdateUse(size_t var, RegUsePointRange& range, const Interval *next_interval)
|
|
{
|
|
// step range
|
|
while (!range.empty() && range.first->instr_idx < instr_idx_offset) {++range.first;}
|
|
|
|
// check if variables used in this interval
|
|
const bool used = !range.empty() && (!next_interval || range.first->instr_idx < next_interval->instr_idx_offset);
|
|
use.set_bit(var, used);
|
|
}
|
|
|
|
void Dump(bool dump_assigned_reg) const
|
|
{
|
|
std::vector<char> liveness_str;
|
|
for (size_t v = 0; v < liveness.size_bit(); ++v) {
|
|
if (liveness.get_bit(v)) {
|
|
const bool used = use.get_bit(v);
|
|
char c;
|
|
if (spill.get_bit(v)) {
|
|
c = used ? 'S' : 's';
|
|
} else if (dump_assigned_reg) {
|
|
int reg = assignment_table[v];
|
|
c = static_cast<char>(reg < 0xA ? '0' + reg : 'A' + reg);
|
|
} else {
|
|
c = used ? 'R' : 'r';
|
|
}
|
|
liveness_str.push_back(c);
|
|
} else {
|
|
liveness_str.push_back('.');
|
|
}
|
|
}
|
|
liveness_str.push_back('\0');
|
|
JITASM_TRACE("[%03d] %s\n", instr_idx_offset, &liveness_str[0]);
|
|
}
|
|
};
|
|
|
|
// 0 ~ NUM_OF_PHYSICAL_REG-1 : Physical register
|
|
// NUM_OF_PHYSICAL_REG ~ : Symbolic register
|
|
std::vector< std::vector<RegUsePoint> > use_points;
|
|
BitVector gen; ///< The set of variables used before any assignment
|
|
BitVector kill; ///< The set of variables assigned a value before any use
|
|
BitVector live_in; ///< The set of live variables at the start of this block
|
|
BitVector live_out; ///< The set of live variables at the end of this block
|
|
bool dirty_live_out; ///< The dirty flag of live_out
|
|
std::vector<Interval> intervals; ///< Lifetime intervals
|
|
|
|
static const int SpillCost_Read = 2;
|
|
static const int SpillCost_Write = 3;
|
|
|
|
Lifetime() : use_points(NUM_OF_PHYSICAL_REG), dirty_live_out(true) {}
|
|
|
|
/// Add register use point
|
|
void AddUsePoint(size_t instr_idx, const RegID& reg, OpdType opd_type, OpdSize opd_size, uint32 reg_assignable)
|
|
{
|
|
if (use_points.size() <= static_cast<size_t>(reg.id)) {
|
|
use_points.resize(reg.id + 1);
|
|
}
|
|
|
|
// add read attribute when writing to 8/16bit register because it is partial write
|
|
if ((opd_type & O_TYPE_WRITE) && (opd_size == O_SIZE_8 || opd_size == O_SIZE_16)) {
|
|
opd_type = static_cast<OpdType>(static_cast<int>(opd_type) | O_TYPE_READ);
|
|
}
|
|
|
|
RegUsePoint use_point(instr_idx, opd_type, reg_assignable);
|
|
std::vector<RegUsePoint>::reverse_iterator it = use_points[reg.id].rbegin();
|
|
while (it != use_points[reg.id].rend() && use_point < *it) {++it;}
|
|
use_points[reg.id].insert(it.base(), use_point);
|
|
}
|
|
|
|
void GetSpillCost(int freq, std::vector<int>& spill_cost) const
|
|
{
|
|
if (spill_cost.size() < use_points.size()) {
|
|
spill_cost.resize(use_points.size()); // expand
|
|
}
|
|
for(size_t i = 0; i < use_points.size(); ++i) {
|
|
int cost = 0;
|
|
for (std::vector<RegUsePoint>::const_iterator it = use_points[i].begin(); it != use_points[i].end(); ++it) {
|
|
if (it->type & O_TYPE_READ) cost += SpillCost_Read;
|
|
if (it->type & O_TYPE_WRITE) cost += SpillCost_Write;
|
|
}
|
|
spill_cost[i] += cost * freq;
|
|
}
|
|
}
|
|
|
|
void BuildIntervals()
|
|
{
|
|
// initialize use_points ranges
|
|
std::vector<RegUsePointRange> use_points_ranges;
|
|
use_points_ranges.reserve(use_points.size());
|
|
for (size_t i = 0; i < use_points.size(); ++i) {
|
|
use_points_ranges.push_back(RegUsePointRange(use_points[i]));
|
|
}
|
|
|
|
// build interval
|
|
BitVector *last_liveness = NULL;
|
|
std::vector<uint32> reg_assignables;
|
|
bool last_reg_constraints = false;
|
|
bool last_stack_vars = false;
|
|
const size_t num_of_variables = live_in.size_bit() < use_points.size() ? use_points.size() : live_in.size_bit();
|
|
size_t instr_idx = 0;
|
|
size_t end_count;
|
|
do {
|
|
BitVector liveness = live_in;
|
|
BitVector stack_vars;
|
|
end_count = 0;
|
|
reg_assignables.clear();
|
|
size_t min_instr_idx = (size_t)-1;
|
|
for (size_t i = 0; i < use_points_ranges.size(); ++i) {
|
|
if (use_points_ranges[i].empty()) {
|
|
liveness.set_bit(i, live_out.get_bit(i));
|
|
++end_count;
|
|
} else {
|
|
if (use_points_ranges[i].first->instr_idx < min_instr_idx) {
|
|
min_instr_idx = use_points_ranges[i].first->instr_idx;
|
|
}
|
|
|
|
if (use_points_ranges[i].first->instr_idx == instr_idx) {
|
|
for (; !use_points_ranges[i].empty() && use_points_ranges[i].first->instr_idx == instr_idx; ++use_points_ranges[i].first) {
|
|
// Check the constraints of register allocation
|
|
if (use_points_ranges[i].first->reg_assignable != 0xFFFFFFFF) {
|
|
reg_assignables.resize(num_of_variables, 0xFFFFFFFF);
|
|
reg_assignables[i] &= use_points_ranges[i].first->reg_assignable;
|
|
}
|
|
|
|
// Check the stack variable
|
|
if (use_points_ranges[i].first->type & O_TYPE_MEM) {
|
|
stack_vars.set_bit(i, true);
|
|
}
|
|
}
|
|
liveness.set_bit(i, true);
|
|
} else if (use_points_ranges[i].first->type & O_TYPE_READ) {
|
|
liveness.set_bit(i, true);
|
|
} else if (use_points_ranges[i].first->type & O_TYPE_WRITE) {
|
|
liveness.set_bit(i, false);
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Split interval in the following case:
|
|
// - The liveness is changed.
|
|
// - Current or last instruction has any constraints of register allocation.
|
|
// - Last instruction is I_COMPILER_DECLARE_STACK_ARG
|
|
if (!reg_assignables.empty() || last_reg_constraints || last_stack_vars || !last_liveness || !last_liveness->is_equal(liveness)) {
|
|
intervals.push_back(Interval(instr_idx, liveness, stack_vars, reg_assignables));
|
|
last_liveness = &intervals.back().liveness;
|
|
}
|
|
|
|
last_reg_constraints = !reg_assignables.empty();
|
|
last_stack_vars = !stack_vars.empty();
|
|
instr_idx = min_instr_idx == instr_idx ? instr_idx + 1 : min_instr_idx;
|
|
} while (end_count < use_points_ranges.size());
|
|
|
|
// check use
|
|
for (size_t v = 0; v < use_points.size(); ++v) {
|
|
RegUsePointRange range(use_points[v]);
|
|
for (size_t i = 0; i < intervals.size(); ++i) {
|
|
const Interval *next_interval = i + 1 < intervals.size() ? &intervals[i + 1] : NULL;
|
|
intervals[i].UpdateUse(v, range, next_interval);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Split interval
|
|
void SplitInterval(size_t instr_idx, size_t interval_idx)
|
|
{
|
|
std::vector<Interval>::iterator it = intervals.insert(intervals.begin() + interval_idx + 1, intervals[interval_idx]);
|
|
it->instr_idx_offset = instr_idx;
|
|
|
|
// update use
|
|
for (size_t v = 0; v < use_points.size(); ++v) {
|
|
RegUsePointRange range(use_points[v]);
|
|
for (size_t i = interval_idx; i < interval_idx + 2; ++i) {
|
|
const Interval *next_interval = i + 1 < intervals.size() ? &intervals[i + 1] : NULL;
|
|
intervals[i].UpdateUse(v, range, next_interval);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct LessCost {
|
|
const std::vector<int> *cost_;
|
|
LessCost(const std::vector<int> *cost) : cost_(cost) {}
|
|
int get_cost(size_t i) const {return i < cost_->size() ? cost_->at(i) : 0;}
|
|
bool operator()(size_t lhs, size_t rhs) const {return get_cost(lhs) < get_cost(rhs);}
|
|
};
|
|
|
|
/// Spill identification
|
|
void SpillIdentification(uint32 available_reg_count, const std::vector<int>& total_spill_cost, int freq, const Interval *last_interval, std::vector<VarAttribute>& var_attrs)
|
|
{
|
|
// initialize use_points ranges
|
|
std::vector<RegUsePointRange> interval_use_points;
|
|
interval_use_points.reserve(use_points.size());
|
|
for (size_t i = 0; i < use_points.size(); ++i) {
|
|
interval_use_points.push_back(RegUsePointRange(use_points[i]));
|
|
}
|
|
|
|
std::vector<size_t> live_vars;
|
|
std::vector<int> cur_spill_cost;
|
|
for (size_t interval_idx = 0; interval_idx < intervals.size(); ++interval_idx) {
|
|
const Interval *prior_interval = interval_idx > 0 ? &intervals[interval_idx - 1] : last_interval;
|
|
Interval *cur_interval = &intervals[interval_idx];
|
|
|
|
if (cur_interval->liveness.count_bit() > available_reg_count) {
|
|
cur_interval->liveness.get_bit_indexes(live_vars);
|
|
|
|
const size_t max_var = live_vars.back();
|
|
if (var_attrs.size() < max_var + 1) {
|
|
var_attrs.resize(max_var + 1); // expand var_attrs
|
|
}
|
|
|
|
cur_spill_cost.resize(max_var + 1);
|
|
for (size_t i = 0; i < live_vars.size(); ++i) {
|
|
const size_t var = live_vars[i];
|
|
|
|
// step interval_use_points
|
|
if (var < interval_use_points.size()) {
|
|
while (!interval_use_points[var].empty() && interval_use_points[var].first->instr_idx < cur_interval->instr_idx_offset) {++interval_use_points[var].first;}
|
|
}
|
|
|
|
// calculate spill cost of this interval
|
|
if (cur_interval->use.get_bit(var) && (interval_use_points[var].first->type & O_TYPE_MEM)) {
|
|
// special low spill cost if this variable on stack (function arguemnt)
|
|
cur_spill_cost[var] = -1;
|
|
} else if (cur_interval->use.get_bit(var) && interval_use_points[var].first->instr_idx == cur_interval->instr_idx_offset) {
|
|
// special high spill cost if this variable is used at first instruction of this interval
|
|
// because it must not be spilled.
|
|
cur_spill_cost[var] = 0x7FFFFFFF;
|
|
} else {
|
|
cur_spill_cost[var] = total_spill_cost[var];
|
|
if (prior_interval && !prior_interval->spill.get_bit(var)) {
|
|
cur_spill_cost[var] += (SpillCost_Read + SpillCost_Write) * freq;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Spill from the smallest cost
|
|
std::sort(live_vars.begin(), live_vars.end(), LessCost(&cur_spill_cost));
|
|
|
|
// Mark spilled variable.
|
|
// Split interval if spilled variable is used in this interval.
|
|
// Find first instruction index using the spilled variable.
|
|
size_t split_interval_instr = (size_t)-1;
|
|
for (size_t i = 0; i < live_vars.size(); ++i) {
|
|
const size_t var = live_vars[i];
|
|
const bool stack_var = (cur_spill_cost[var] < 0); // It may be function argument on stack
|
|
const bool spill = (i + available_reg_count < live_vars.size() || stack_var);
|
|
cur_interval->spill.set_bit(var, spill);
|
|
if (spill) {
|
|
var_attrs[var].spill = true;
|
|
}
|
|
if (stack_var) {
|
|
// Split at next of using stack variable
|
|
if (interval_use_points[var].first->instr_idx + 1 < split_interval_instr) {
|
|
split_interval_instr = interval_use_points[var].first->instr_idx + 1;
|
|
}
|
|
} else if (spill && cur_interval->use.get_bit(var)) {
|
|
// Split if spilled variable is used in this interval.
|
|
if (interval_use_points[var].first->instr_idx < split_interval_instr) {
|
|
split_interval_instr = interval_use_points[var].first->instr_idx;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (split_interval_instr != (size_t)-1) {
|
|
SplitInterval(split_interval_instr, interval_idx);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
struct LessAssignOrder {
|
|
Interval *interval;
|
|
const Interval *prior_interval;
|
|
LessAssignOrder(Interval *cur, const Interval *prior) : interval(cur), prior_interval(prior) {}
|
|
bool has_constraints(size_t v) const {return v < interval->reg_assignables.size() ? interval->reg_assignables[v] != 0xFFFFFFFF : false;}
|
|
uint32 num_of_assignable(size_t v) const {return v < interval->reg_assignables.size() ? detail::Count1Bits(interval->reg_assignables[v]) : 32;}
|
|
bool operator()(size_t lhs, size_t rhs) const {
|
|
// is there any register constraints or not
|
|
const bool lhs_has_constraints = has_constraints(lhs);
|
|
const bool rhs_has_constraints = has_constraints(rhs);
|
|
if (lhs_has_constraints != rhs_has_constraints) {
|
|
return lhs_has_constraints;
|
|
}
|
|
|
|
if (lhs_has_constraints) {
|
|
// is the register which has constraints used in this interval or not
|
|
const bool lhs_used = interval->use.get_bit(lhs);
|
|
const bool rhs_used = interval->use.get_bit(rhs);
|
|
if (lhs_used != rhs_used) {
|
|
return lhs_used;
|
|
}
|
|
|
|
// compare number of assignable registers
|
|
const uint32 lhs_num_of_assignable = num_of_assignable(lhs);
|
|
const uint32 rhs_num_of_assignable = num_of_assignable(rhs);
|
|
if (lhs_num_of_assignable != rhs_num_of_assignable) {
|
|
return lhs_num_of_assignable < rhs_num_of_assignable;
|
|
}
|
|
}
|
|
|
|
// physical register or symbolic register
|
|
const int lhs_sym_reg = (lhs < NUM_OF_PHYSICAL_REG ? 0 : 1);
|
|
const int rhs_sym_reg = (rhs < NUM_OF_PHYSICAL_REG ? 0 : 1);
|
|
if (lhs_sym_reg != rhs_sym_reg) {
|
|
return lhs_sym_reg < rhs_sym_reg;
|
|
}
|
|
|
|
if (prior_interval) {
|
|
// is the variable assigned register in prior interval or not
|
|
const bool lhs_prior_reg = !prior_interval->spill.get_bit(lhs) && prior_interval->liveness.get_bit(lhs);
|
|
const bool rhs_prior_reg = !prior_interval->spill.get_bit(rhs) && prior_interval->liveness.get_bit(rhs);
|
|
if (lhs_prior_reg != rhs_prior_reg) {
|
|
return lhs_prior_reg;
|
|
}
|
|
}
|
|
|
|
// compare register id
|
|
return lhs < rhs;
|
|
}
|
|
};
|
|
|
|
/// Assign register in basic block
|
|
/**
|
|
* \param[in] available_reg Available physical register mask
|
|
* \param[in] last_interval Last Interval as the hint of assignment
|
|
* \return Used physical register mask
|
|
*/
|
|
uint32 AssignRegister(uint32 available_reg, const Interval *last_interval)
|
|
{
|
|
uint32 used_reg = 0;
|
|
std::vector<size_t> live_vars;
|
|
for (size_t interval_idx = 0; interval_idx < intervals.size(); ++interval_idx) {
|
|
const Interval *prior_interval = interval_idx > 0 ? &intervals[interval_idx - 1] : last_interval;
|
|
Interval *cur_interval = &intervals[interval_idx];
|
|
|
|
// enum variables to assign register
|
|
live_vars.clear();
|
|
for (size_t i = 0; i < cur_interval->liveness.size(); ++i) {
|
|
uint32 s = i < cur_interval->spill.size() ? cur_interval->spill[i] : 0;
|
|
uint32 l = cur_interval->liveness[i] & ~s;
|
|
while (l != 0) {
|
|
uint32 index = detail::bit_scan_forward(l);
|
|
live_vars.push_back(static_cast<uint32>(i * 32) + index);
|
|
l &= ~(1 << index);
|
|
}
|
|
}
|
|
|
|
if (!live_vars.empty()) {
|
|
cur_interval->assignment_table.resize(live_vars.back() + 1, -1);
|
|
|
|
// sort into assignment order
|
|
std::sort(live_vars.begin(), live_vars.end(), LessAssignOrder(cur_interval, prior_interval));
|
|
}
|
|
|
|
// Assign register
|
|
uint32 cur_avail = available_reg;
|
|
const size_t num_of_live_vars = live_vars.size();
|
|
for (size_t i = 0; i < live_vars.size(); ++i) {
|
|
const size_t var = live_vars[i];
|
|
const bool first_try = (i < num_of_live_vars); // Try to assign for the first time
|
|
const uint32 reg_assignable = first_try && var < cur_interval->reg_assignables.size() ? cur_interval->reg_assignables[var] : 0xFFFFFFFF; // Ignore constraint if it is retried
|
|
JITASM_ASSERT((cur_avail & reg_assignable) != 0);
|
|
int assigned_reg = -1;
|
|
if (var < NUM_OF_PHYSICAL_REG && first_try) {
|
|
// Physical register
|
|
if (cur_avail & reg_assignable & (1 << var)) {
|
|
assigned_reg = static_cast<int>(var);
|
|
} else if (((1 << var) & available_reg) && !cur_interval->use.get_bit(var)) {
|
|
// Try to assign another physical register if it is not used in this interval. But assign later.
|
|
live_vars.push_back(var);
|
|
} else if (reg_assignable != 0xFFFFFFFF && (cur_avail & reg_assignable) && cur_interval->use.get_bit(var)) {
|
|
// This physical register violates the register constraint.
|
|
// Assign another physical register which satisfy the constraint.
|
|
assigned_reg = detail::bit_scan_forward(cur_avail & reg_assignable);
|
|
} else {
|
|
// This may be out of assignment register (ESP, EBP and so on...)
|
|
JITASM_ASSERT(((1 << var) & available_reg) == 0); // false assignment!?
|
|
assigned_reg = static_cast<int>(var);
|
|
}
|
|
} else {
|
|
// Symbolic register or retried physical register
|
|
const int last_assigned = prior_interval && var < prior_interval->assignment_table.size() ? prior_interval->assignment_table[var] : -1;
|
|
if (last_assigned != -1 && (cur_avail & reg_assignable & (1 << last_assigned))) {
|
|
// select last assigned register
|
|
assigned_reg = last_assigned;
|
|
} else if (cur_avail & reg_assignable) {
|
|
assigned_reg = detail::bit_scan_forward(cur_avail & reg_assignable);
|
|
} else if (reg_assignable != 0xFFFFFFFF && !cur_interval->use.get_bit(var)) {
|
|
// Try to assign register ignoring constraint if it is not used in this interval. But assign later.
|
|
live_vars.push_back(var);
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
if (assigned_reg >= 0) {
|
|
cur_interval->assignment_table[var] = assigned_reg;
|
|
cur_avail &= ~(1 << assigned_reg);
|
|
}
|
|
}
|
|
|
|
used_reg |= ~cur_avail & available_reg;
|
|
}
|
|
|
|
return used_reg;
|
|
}
|
|
|
|
void DumpIntervals(size_t block_id, bool dump_assigned_reg) const
|
|
{
|
|
avoid_unused_warn(block_id);
|
|
JITASM_TRACE("---- Block%d ----\n", block_id);
|
|
for (size_t i = 0; i < intervals.size(); ++i) {
|
|
intervals[i].Dump(dump_assigned_reg);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Basic block
|
|
struct BasicBlock
|
|
{
|
|
BasicBlock *successor[2];
|
|
std::vector<BasicBlock *> predecessor;
|
|
size_t instr_begin; ///< Begin instruction index of the basic block (inclusive)
|
|
size_t instr_end; ///< End instruction index of the basic block (exclusive)
|
|
size_t depth; ///< Depth-first order of Control flow
|
|
BasicBlock *dfs_parent; ///< Depth-first search tree parent
|
|
BasicBlock *immediate_dominator; ///< Immediate dominator
|
|
size_t loop_depth; ///< Loop nesting depth
|
|
Lifetime lifetime[3]; ///< Variable lifetime (0: GP, 1: MMX, 2: XMM/YMM)
|
|
|
|
BasicBlock(size_t instr_begin_, size_t instr_end_, BasicBlock *successor0 = NULL, BasicBlock *successor1 = NULL) : instr_begin(instr_begin_), instr_end(instr_end_), depth((size_t)-1), dfs_parent(NULL), immediate_dominator(NULL), loop_depth(0) {
|
|
successor[0] = successor0;
|
|
successor[1] = successor1;
|
|
}
|
|
|
|
bool operator<(const BasicBlock& rhs) const { return instr_begin < rhs.instr_begin; }
|
|
|
|
/// Remove predecessor
|
|
void RemovePredecessor(BasicBlock *block) {
|
|
std::vector<BasicBlock *>::iterator it = std::find(predecessor.begin(), predecessor.end(), block);
|
|
if (it != predecessor.end()) {
|
|
predecessor.erase(it);
|
|
}
|
|
}
|
|
|
|
/// Replace predecessor
|
|
bool ReplacePredecessor(BasicBlock *old_pred, BasicBlock *new_pred) {
|
|
std::vector<BasicBlock *>::iterator it = std::find(predecessor.begin(), predecessor.end(), old_pred);
|
|
if (it != predecessor.end()) {
|
|
*it = new_pred;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Check if the specified block is dominator of this block
|
|
bool IsDominated(BasicBlock *block) const {
|
|
if (block == this) {return true;}
|
|
return immediate_dominator ? immediate_dominator->IsDominated(block) : false;
|
|
}
|
|
|
|
/// Get estimated frequency of basic block
|
|
int GetFrequency() const {
|
|
const static int freq[] = {1, 100, 10000, 40000, 160000, 640000};
|
|
return freq[loop_depth < sizeof(freq) / sizeof(int) ? loop_depth : sizeof(freq) / sizeof(int) - 1];
|
|
}
|
|
|
|
/// Get variable lifetime
|
|
Lifetime& GetLifetime(RegType type) {return lifetime[GetRegFamily(type)];}
|
|
/// Get variable lifetime
|
|
const Lifetime& GetLifetime(RegType type) const {return lifetime[GetRegFamily(type)];}
|
|
|
|
struct less
|
|
{
|
|
bool operator()(BasicBlock *lhs, BasicBlock *rhs) { return lhs->instr_begin < rhs->instr_begin; }
|
|
bool operator()(BasicBlock *lhs, size_t rhs) { return lhs->instr_begin < rhs; }
|
|
bool operator()(size_t lhs, BasicBlock *rhs) { return lhs < rhs->instr_begin; }
|
|
};
|
|
};
|
|
|
|
/**
|
|
* The Lengauer-Tarjan algorithm
|
|
*/
|
|
class DominatorFinder
|
|
{
|
|
private:
|
|
std::vector<size_t> sdom_; // semidominator
|
|
std::vector<size_t> ancestor_;
|
|
std::vector<size_t> best_;
|
|
|
|
void Link(size_t v, size_t w)
|
|
{
|
|
ancestor_[w] = v;
|
|
}
|
|
|
|
size_t Eval(size_t v)
|
|
{
|
|
if (ancestor_[v] == 0) return v;
|
|
Compress(v);
|
|
return best_[v];
|
|
}
|
|
|
|
void Compress(size_t v)
|
|
{
|
|
size_t a = ancestor_[v];
|
|
if (ancestor_[a] == 0)
|
|
return;
|
|
|
|
Compress(a);
|
|
|
|
if (sdom_[best_[v]] > sdom_[best_[a]])
|
|
best_[v] = best_[a];
|
|
|
|
ancestor_[v] = ancestor_[a];
|
|
}
|
|
|
|
public:
|
|
void operator()(std::deque<BasicBlock *>& depth_first_blocks)
|
|
{
|
|
const size_t num_of_nodes = depth_first_blocks.size();
|
|
if (num_of_nodes == 0) return;
|
|
|
|
// initialize
|
|
sdom_.resize(num_of_nodes); // semidominator
|
|
ancestor_.clear();
|
|
ancestor_.resize(num_of_nodes);
|
|
best_.resize(num_of_nodes);
|
|
std::vector< std::vector<size_t> > bucket(num_of_nodes);
|
|
std::vector<size_t> dom(num_of_nodes);
|
|
for (size_t i = 0; i < num_of_nodes; ++i) {
|
|
sdom_[i] = i;
|
|
best_[i] = i;
|
|
}
|
|
|
|
for (size_t w = num_of_nodes - 1; w > 0; --w) {
|
|
BasicBlock *wb = depth_first_blocks[w];
|
|
size_t p = wb->dfs_parent->depth;
|
|
|
|
// Compute the semidominator
|
|
for (std::vector<BasicBlock *>::iterator v = wb->predecessor.begin(); v != wb->predecessor.end(); ++v) {
|
|
if ((*v)->depth != (size_t)-1) { // skip out of DFS tree
|
|
size_t u = Eval((*v)->depth);
|
|
if (sdom_[u] < sdom_[w])
|
|
sdom_[w] = sdom_[u];
|
|
}
|
|
}
|
|
bucket[sdom_[w]].push_back(w);
|
|
Link(p, w);
|
|
|
|
// Implicity compute immediate dominator
|
|
for (std::vector<size_t>::iterator v = bucket[p].begin(); v != bucket[p].end(); ++v) {
|
|
size_t u = Eval(*v);
|
|
dom[*v] = sdom_[u] < sdom_[*v] ? u : p;
|
|
}
|
|
bucket[p].clear();
|
|
}
|
|
|
|
// Explicity compute immediate dominator
|
|
for (size_t w = 1; w < num_of_nodes; ++w) {
|
|
if (dom[w] != sdom_[w])
|
|
dom[w] = dom[dom[w]];
|
|
depth_first_blocks[w]->immediate_dominator = depth_first_blocks[dom[w]];
|
|
}
|
|
depth_first_blocks[0]->immediate_dominator = NULL;
|
|
}
|
|
};
|
|
|
|
/// Control flow graph
|
|
class ControlFlowGraph
|
|
{
|
|
public:
|
|
typedef std::deque<BasicBlock *> BlockList;
|
|
|
|
private:
|
|
BlockList blocks_;
|
|
BlockList depth_first_blocks_;
|
|
|
|
void MakeDepthFirstBlocks(BasicBlock *block)
|
|
{
|
|
block->depth = 0; // mark "visited"
|
|
for (size_t i = 0; i < 2; ++i) {
|
|
BasicBlock *s = block->successor[i];
|
|
if (s && s->depth != 0) {
|
|
s->dfs_parent = block;
|
|
MakeDepthFirstBlocks(s);
|
|
}
|
|
}
|
|
depth_first_blocks_.push_front(block);
|
|
}
|
|
|
|
struct sort_backedge {
|
|
bool operator()(const std::pair<size_t, size_t>& lhs, const std::pair<size_t, size_t>& rhs) const {
|
|
if (lhs.second < rhs.second) return true; // smaller depth loop header first
|
|
if (lhs.second == rhs.second) return lhs.first > rhs.first; // larger depth of end of loop first if same loop header
|
|
return false;
|
|
}
|
|
};
|
|
|
|
void DetectLoops()
|
|
{
|
|
// Make dominator tree
|
|
DominatorFinder dom_finder;
|
|
dom_finder(depth_first_blocks_);
|
|
|
|
// Identify backedges
|
|
std::vector< std::pair<size_t, size_t> > backedges;
|
|
for (size_t i = 0; i < depth_first_blocks_.size(); ++i) {
|
|
BasicBlock *block = depth_first_blocks_[i];
|
|
for (size_t j = 0; j < 2; ++j) {
|
|
if (block->successor[j] && block->depth >= block->successor[j]->depth) { // retreating edge
|
|
if (block->IsDominated(block->successor[j])) {
|
|
backedges.push_back(std::make_pair(block->depth, block->successor[j]->depth));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Merge loops with the same loop header
|
|
std::sort(backedges.begin(), backedges.end(), sort_backedge());
|
|
if (backedges.size() >= 2) {
|
|
std::vector< std::pair<size_t, size_t> >::iterator it = backedges.begin() + 1;
|
|
while (it != backedges.end()) {
|
|
if (detail::prior(it)->second == it->second) {
|
|
// erase backedge of smaller loop
|
|
it = backedges.erase(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set loop depth
|
|
for (std::vector< std::pair<size_t, size_t> >::iterator it = backedges.begin(); it != backedges.end(); ++it) {
|
|
for (size_t i = it->second; i <= it->first; ++i) {
|
|
depth_first_blocks_[i]->loop_depth++;
|
|
}
|
|
}
|
|
}
|
|
|
|
BlockList::iterator initialize(size_t num_of_instructions) {
|
|
clear();
|
|
blocks_.resize(num_of_instructions > 0 ? 2 : 1);
|
|
BasicBlock *enter_block = new BasicBlock(0, num_of_instructions);
|
|
blocks_[0] = enter_block;
|
|
if (num_of_instructions > 0) {
|
|
// exit block
|
|
BasicBlock *exit_block = new BasicBlock(num_of_instructions, num_of_instructions);
|
|
blocks_[1] = exit_block;
|
|
enter_block->successor[0] = exit_block;
|
|
exit_block->predecessor.push_back(enter_block);
|
|
}
|
|
return blocks_.begin();
|
|
}
|
|
|
|
/// Split basic block
|
|
BlockList::iterator split(BlockList::iterator target_block_it, size_t instr_idx) {
|
|
BasicBlock *target_block = *target_block_it;
|
|
if (target_block->instr_begin == instr_idx)
|
|
return target_block_it;
|
|
|
|
BasicBlock *new_block = new BasicBlock(instr_idx, target_block->instr_end);
|
|
new_block->successor[0] = target_block->successor[0];
|
|
new_block->successor[1] = target_block->successor[1];
|
|
new_block->predecessor.push_back(target_block);
|
|
target_block->successor[0] = new_block;
|
|
target_block->successor[1] = NULL;
|
|
target_block->instr_end = instr_idx;
|
|
|
|
// replace predecessor of successors
|
|
if (new_block->successor[0]) new_block->successor[0]->ReplacePredecessor(target_block, new_block);
|
|
if (new_block->successor[1]) new_block->successor[1]->ReplacePredecessor(target_block, new_block);
|
|
|
|
return blocks_.insert(detail::next(target_block_it), new_block);
|
|
}
|
|
|
|
public:
|
|
~ControlFlowGraph()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
BlockList::iterator get_block(size_t instr_idx) {
|
|
BlockList::iterator it = std::upper_bound(blocks_.begin(), blocks_.end(), instr_idx, BasicBlock::less());
|
|
return it != blocks_.begin() ? --it : blocks_.end();
|
|
}
|
|
|
|
BlockList::iterator get_exit_block() {
|
|
return detail::prior(blocks_.end());
|
|
}
|
|
|
|
size_t size() { return blocks_.size(); }
|
|
|
|
void clear()
|
|
{
|
|
for (BlockList::iterator it = blocks_.begin(); it != blocks_.end(); ++it) {
|
|
delete *it;
|
|
}
|
|
blocks_.clear();
|
|
depth_first_blocks_.clear();
|
|
}
|
|
|
|
BlockList::iterator begin() { return blocks_.begin(); }
|
|
BlockList::iterator end() { return blocks_.end(); }
|
|
BlockList::const_iterator begin() const { return blocks_.begin(); }
|
|
BlockList::const_iterator end() const { return blocks_.end(); }
|
|
BlockList::iterator dfs_begin() { return depth_first_blocks_.begin(); }
|
|
BlockList::iterator dfs_end() { return depth_first_blocks_.end(); }
|
|
|
|
void DumpDot() const
|
|
{
|
|
JITASM_TRACE("digraph CFG {\n");
|
|
JITASM_TRACE("\tnode[shape=box];\n");
|
|
for (BlockList::const_iterator it = blocks_.begin(); it != blocks_.end(); ++it) {
|
|
BasicBlock *block = *it;
|
|
std::string live_in = "live in:";
|
|
std::string live_out = "live out:";
|
|
for (size_t reg_family = 0; reg_family < 3; ++reg_family) {
|
|
for (size_t i = 0; i < block->lifetime[reg_family].live_in.size_bit(); ++i) {
|
|
if (block->lifetime[reg_family].live_in.get_bit(i)) {
|
|
live_in.append(" ");
|
|
live_in.append(GetRegName(static_cast<RegType>(reg_family + (i < NUM_OF_PHYSICAL_REG ? R_TYPE_GP : R_TYPE_SYMBOLIC_GP)), i));
|
|
}
|
|
}
|
|
for (size_t i = 0; i < block->lifetime[reg_family].live_out.size_bit(); ++i) {
|
|
if (block->lifetime[reg_family].live_out.get_bit(i)) {
|
|
live_out.append(" ");
|
|
live_out.append(GetRegName(static_cast<RegType>(reg_family + (i < NUM_OF_PHYSICAL_REG ? R_TYPE_GP : R_TYPE_SYMBOLIC_GP)), i));
|
|
}
|
|
}
|
|
}
|
|
JITASM_TRACE("\tnode%d[label=\"Block%d\\ninstruction %d - %d\\nloop depth %d\\n%s\\n%s\"];\n", block->instr_begin, block->depth, block->instr_begin, block->instr_end - 1, block->loop_depth, live_in.c_str(), live_out.c_str());
|
|
int constraint = 0; avoid_unused_warn(constraint);
|
|
if (block->successor[0]) JITASM_TRACE("\t\"node%d\" -> \"node%d\" [constraint=%s];\n", block->instr_begin, block->successor[0]->instr_begin, constraint == 0 ? "true" : "false");
|
|
if (block->successor[1]) JITASM_TRACE("\t\"node%d\" -> \"node%d\" [constraint=%s];\n", block->instr_begin, block->successor[1]->instr_begin, constraint == 0 ? "true" : "false");
|
|
//if (block->dfs_parent) JITASM_TRACE("\t\"node%d\" -> \"node%d\" [color=\"#ff0000\"];\n", block->instr_begin, block->dfs_parent->instr_begin);
|
|
//if (block->immediate_dominator) JITASM_TRACE("\t\"node%d\" -> \"node%d\" [constraint=false, color=\"#0000ff\"];\n", block->instr_begin, block->immediate_dominator->instr_begin);
|
|
//for (size_t i = 0; i < block->predecessor.size(); ++i) {
|
|
// JITASM_TRACE("\t\"node%d\" -> \"node%d\" [constraint=false, color=\"#808080\"];\n", block->instr_begin, block->predecessor[i]->instr_begin);
|
|
//}
|
|
}
|
|
JITASM_TRACE("}\n");
|
|
}
|
|
|
|
/// Build control flow graph from instruction list
|
|
void Build(const Frontend& f)
|
|
{
|
|
initialize(f.instrs_.size());
|
|
size_t block_idx = 0;
|
|
for (size_t instr_idx = 0; instr_idx < f.instrs_.size(); ++instr_idx) {
|
|
BasicBlock *cur_block = blocks_[block_idx];
|
|
InstrID instr_id = f.instrs_[instr_idx].GetID();
|
|
if (Frontend::IsJump(instr_id) || instr_id == I_RET || instr_id == I_IRET) {
|
|
// Jump instruction always terminate basic block
|
|
if (instr_idx + 1 < cur_block->instr_end) {
|
|
// Split basic block
|
|
split(blocks_.begin() + block_idx, instr_idx + 1);
|
|
++block_idx;
|
|
}
|
|
else {
|
|
// Already splitted
|
|
++block_idx;
|
|
}
|
|
|
|
// Set successors of current block
|
|
if (instr_id == I_RET || instr_id == I_IRET) {
|
|
if (cur_block->successor[0])
|
|
cur_block->successor[0]->RemovePredecessor(cur_block);
|
|
cur_block->successor[0] = *get_exit_block();
|
|
(*get_exit_block())->predecessor.push_back(cur_block);
|
|
}
|
|
else {
|
|
const size_t jump_to = f.GetJumpTo(f.instrs_[instr_idx]); // jump target instruction index
|
|
BlockList::iterator jump_to_block = get_block(jump_to);
|
|
if (static_cast<size_t>(std::distance(blocks_.begin(), jump_to_block)) < block_idx) ++block_idx; // Adjust block_idx for split
|
|
BasicBlock *jump_target = *split(jump_to_block, jump_to);
|
|
|
|
// Update cur_block if split cur_block
|
|
if (jump_target->instr_begin <= instr_idx && instr_idx < jump_target->instr_end) {
|
|
cur_block = jump_target;
|
|
}
|
|
|
|
if (instr_id == I_JMP) {
|
|
if (cur_block->successor[0])
|
|
cur_block->successor[0]->RemovePredecessor(cur_block);
|
|
cur_block->successor[0] = jump_target;
|
|
jump_target->predecessor.push_back(cur_block);
|
|
}
|
|
else {
|
|
JITASM_ASSERT(instr_id == I_JCC || instr_id == I_LOOP);
|
|
if (cur_block->successor[1])
|
|
cur_block->successor[1]->RemovePredecessor(cur_block);
|
|
cur_block->successor[1] = jump_target;
|
|
jump_target->predecessor.push_back(cur_block);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make depth first orderd list
|
|
MakeDepthFirstBlocks(*get_block(0));
|
|
|
|
// Numbering depth
|
|
for (size_t i = 0; i < depth_first_blocks_.size(); ++i) {
|
|
depth_first_blocks_[i]->depth = i;
|
|
}
|
|
|
|
// Detect loops
|
|
DetectLoops();
|
|
}
|
|
|
|
/// Build dummy control flow graph which has enter and exit blocks.
|
|
void BuildDummy(const Frontend& f)
|
|
{
|
|
BasicBlock *enter_block = *initialize(f.instrs_.size());
|
|
BasicBlock *exit_block = enter_block->successor[0];
|
|
|
|
enter_block->depth = 0;
|
|
depth_first_blocks_.push_back(enter_block);
|
|
if (exit_block) {
|
|
exit_block->depth = 1;
|
|
exit_block->dfs_parent = enter_block;
|
|
exit_block->immediate_dominator = enter_block;
|
|
depth_first_blocks_.push_back(exit_block);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Prepare compile
|
|
/**
|
|
* - Re-number symbolic register ID.
|
|
* - Check if register allocation is needed or not.
|
|
* - Look over physical register use.
|
|
*
|
|
* \param[in,out] instrs Instruction list
|
|
* \param[out] modified_physical_reg Modified physical register mask
|
|
* \param[out] need_reg_alloc Register allocation is needed or not
|
|
* \return There is any compile process if it is true.
|
|
*/
|
|
inline bool PrepareCompile(Frontend::InstrList& instrs, uint32 (&modified_physical_reg)[3], bool (&need_reg_alloc)[3])
|
|
{
|
|
struct RegIDMap {
|
|
int next_id_;
|
|
std::map<int, int> id_map_;
|
|
RegIDMap() : next_id_(NUM_OF_PHYSICAL_REG) {}
|
|
int GetNormalizedID(int id) {
|
|
std::map<int, int>::iterator it = id_map_.find(id);
|
|
if (it != id_map_.end()) {return it->second;}
|
|
int new_id = next_id_++;
|
|
id_map_.insert(std::pair<int, int>(id, new_id));
|
|
return new_id;
|
|
}
|
|
};
|
|
RegIDMap reg_id_map[3]; // GP, MMX, XMM/YMM
|
|
modified_physical_reg[0] = modified_physical_reg[1] = modified_physical_reg[2] = 0;
|
|
need_reg_alloc[0] = need_reg_alloc[1] = need_reg_alloc[2] = false;
|
|
bool compile_process = false;
|
|
|
|
for (Frontend::InstrList::iterator it = instrs.begin(); it != instrs.end(); ++it) {
|
|
const InstrID instr_id = it->GetID();
|
|
if (instr_id == I_COMPILER_DECLARE_REG_ARG || instr_id == I_COMPILER_DECLARE_STACK_ARG || instr_id == I_COMPILER_DECLARE_RESULT_REG || instr_id == I_COMPILER_PROLOG || instr_id == I_COMPILER_EPILOG) {
|
|
compile_process = true;
|
|
}
|
|
|
|
for (size_t i = 0; i < Instr::MAX_OPERAND_COUNT; ++i) {
|
|
detail::Opd& opd = it->GetOpd(i);
|
|
if (opd.IsReg() && !opd.IsFpuReg()) {
|
|
const RegID& reg = opd.GetReg();
|
|
const size_t reg_family = GetRegFamily(reg.type);
|
|
if (reg.IsSymbolic()) {
|
|
opd.reg_.id = reg_id_map[reg_family].GetNormalizedID(reg.id);
|
|
} else {
|
|
if (opd.opdtype_ & O_TYPE_WRITE) {
|
|
// This physical register is modified
|
|
modified_physical_reg[reg_family] |= (1 << reg.id);
|
|
}
|
|
|
|
if ((opd.reg_assignable_ & (1 << reg.id)) == 0) {
|
|
// Specified physical register does not fit the instruction.
|
|
// Let's try to assign optimal physical register by register allocation.
|
|
need_reg_alloc[reg_family] = true;
|
|
}
|
|
}
|
|
} else if (opd.IsMem()) {
|
|
const RegID& base = opd.GetBase();
|
|
if (base.IsSymbolic()) {
|
|
opd.base_.id = reg_id_map[0].GetNormalizedID(base.id);
|
|
}
|
|
|
|
const RegID& index = opd.GetIndex();
|
|
if (index.IsSymbolic()) {
|
|
opd.index_.id = reg_id_map[0].GetNormalizedID(index.id);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < 3; ++i) {
|
|
if (!need_reg_alloc[i] && reg_id_map[i].next_id_ > NUM_OF_PHYSICAL_REG) {
|
|
need_reg_alloc[i] = true;
|
|
}
|
|
}
|
|
|
|
return compile_process || need_reg_alloc[0] || need_reg_alloc[1] || need_reg_alloc[2];
|
|
}
|
|
|
|
/// Check the instruction if it break register dependence
|
|
inline bool IsBreakDependenceInstr(const Instr& instr)
|
|
{
|
|
// Instructions
|
|
// SUB, SBB, XOR, PXOR, XORPS, XORPD, PANDN, PSUBxx, PCMPxx
|
|
// TODO: Add AVX instructions
|
|
const InstrID id = instr.GetID();
|
|
if (id == I_SUB || id == I_SBB || id == I_XOR || id == I_PXOR || id == I_XORPS || id == I_XORPD || id == I_PANDN ||
|
|
id == I_PSUBB || id == I_PSUBW || id == I_PSUBD || id == I_PSUBQ || id == I_PSUBSB || id == I_PSUBSW || id == I_PSUBUSB || id == I_PSUBUSW ||
|
|
id == I_PCMPEQB || id == I_PCMPEQW || id == I_PCMPEQD || id == I_PCMPEQQ || id == I_PCMPGTB || id == I_PCMPGTW || id == I_PCMPGTD || id == I_PCMPGTQ) {
|
|
// source and destination operands are the same register.
|
|
// 8bit or 16bit register cannot break dependence.
|
|
const detail::Opd& opd0 = instr.GetOpd(0);
|
|
const detail::Opd& opd1 = instr.GetOpd(1);
|
|
const OpdSize opdsize = opd0.GetSize();
|
|
if (opd0 == opd1 && opd0.IsReg() && opdsize != O_SIZE_8 && opdsize != O_SIZE_16) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Live Variable Analysis
|
|
inline void LiveVariableAnalysis(const Frontend& f, ControlFlowGraph& cfg, VariableManager& var_manager)
|
|
{
|
|
std::vector<BasicBlock *> update_target;
|
|
update_target.reserve(cfg.size());
|
|
|
|
for (ControlFlowGraph::BlockList::iterator it = cfg.begin(); it != cfg.end(); ++it) {
|
|
BasicBlock *block = *it;
|
|
// Scanning instructions of basic block and make register lifetime table
|
|
for (size_t i = block->instr_begin; i != block->instr_end; ++i) {
|
|
const size_t instr_offset = i - block->instr_begin;
|
|
const Instr& instr = f.instrs_[i];
|
|
if (instr.GetID() == I_COMPILER_DECLARE_REG_ARG) {
|
|
// Declare function argument on register
|
|
const detail::Opd& opd0 = instr.GetOpd(0);
|
|
const RegID& reg = opd0.GetReg();
|
|
// Avoid passing operand size 8 or 16 to AddUsePoint
|
|
// because they are treated as partial access register and cause miss assignment of register.
|
|
OpdSize opd_size = opd0.GetSize();
|
|
if (opd_size == O_SIZE_8 || opd_size == O_SIZE_16) {
|
|
opd_size = O_SIZE_32;
|
|
}
|
|
const detail::Opd& opd1 = instr.GetOpd(1);
|
|
block->GetLifetime(reg.type).AddUsePoint(instr_offset, reg, opd0.opdtype_, opd_size, opd0.reg_assignable_);
|
|
if (opd1.IsMem()) {
|
|
var_manager.SetSpillSlot(reg.type, reg.id, Addr(opd1.GetBase(), opd1.GetDisp()));
|
|
}
|
|
} else if (instr.GetID() == I_COMPILER_DECLARE_STACK_ARG) {
|
|
// Declare function argument on stack
|
|
// The register variable starts "spill" state by O_TYPE_MEM of AddUsePoint
|
|
const detail::Opd& opd0 = instr.GetOpd(0); // Register variable.
|
|
const RegID& reg = opd0.GetReg();
|
|
// Avoid passing operand size 8 or 16 to AddUsePoint
|
|
// because they are treated as partial access register and cause miss assignment of register.
|
|
OpdSize opd_size = opd0.GetSize();
|
|
if (opd_size == O_SIZE_8 || opd_size == O_SIZE_16) {
|
|
opd_size = O_SIZE_32;
|
|
}
|
|
const detail::Opd& opd1 = instr.GetOpd(1); // Argument
|
|
block->GetLifetime(reg.type).AddUsePoint(instr_offset, reg, static_cast<OpdType>(O_TYPE_MEM | O_TYPE_WRITE), opd_size, opd0.reg_assignable_);
|
|
var_manager.SetSpillSlot(reg.type, reg.id, Addr(opd1.GetBase(), opd1.GetDisp()));
|
|
} else if (instr.GetID() == I_COMPILER_DECLARE_RESULT_REG) {
|
|
// Declare function result on register
|
|
const detail::Opd& opd0 = instr.GetOpd(0);
|
|
const RegID& reg = opd0.GetReg();
|
|
block->GetLifetime(reg.type).AddUsePoint(instr_offset, reg, opd0.opdtype_, opd0.GetSize(), opd0.reg_assignable_);
|
|
} else if (IsBreakDependenceInstr(instr)) {
|
|
// Add only 1 use point if the instruction that break register dependence
|
|
const detail::Opd& opd = instr.GetOpd(0);
|
|
const RegID& reg = opd.GetReg();
|
|
block->GetLifetime(reg.type).AddUsePoint(instr_offset, reg, static_cast<OpdType>(O_TYPE_REG | O_TYPE_WRITE), opd.GetSize(), opd.reg_assignable_);
|
|
var_manager.UpdateVarSize(reg.type, reg.id, opd.GetSize() / 8);
|
|
} else if (instr.GetID() == I_PUSHAD || instr.GetID() == I_POPAD) {
|
|
// Add use point of pushad/popad
|
|
const OpdType type = static_cast<OpdType>(O_TYPE_REG | (instr.GetID() == I_PUSHAD ? O_TYPE_READ : O_TYPE_WRITE));
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, EAX), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, ECX), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, EDX), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, EBX), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, EBP), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, ESI), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, EDI), type, O_SIZE_32, 0xFFFFFFFF);
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_GP, ESP), static_cast<OpdType>(O_TYPE_REG | O_TYPE_READ | O_TYPE_WRITE), O_SIZE_32, 0xFFFFFFFF);
|
|
} else if (instr.GetID() == I_VZEROALL || instr.GetID() == I_VZEROUPPER) {
|
|
// Add use point of vzeroall/vzeroupper
|
|
const OpdType type = static_cast<OpdType>(O_TYPE_REG | (instr.GetID() == I_VZEROALL ? O_TYPE_WRITE : O_TYPE_READ | O_TYPE_WRITE));
|
|
for (int j = 0; j < NUM_OF_PHYSICAL_REG; ++j) {
|
|
block->GetLifetime(R_TYPE_YMM).AddUsePoint(instr_offset, RegID::CreatePhysicalRegID(R_TYPE_YMM, static_cast<PhysicalRegID>(YMM0 + j)), type, O_SIZE_256, 0xFFFFFFFF);
|
|
}
|
|
} else {
|
|
// Add each use point of all operands
|
|
for (size_t j = 0; j < Instr::MAX_OPERAND_COUNT; ++j) {
|
|
const detail::Opd& opd = instr.GetOpd(j);
|
|
if (opd.IsGpReg() || opd.IsMmxReg() || opd.IsXmmReg() || opd.IsYmmReg()) {
|
|
// Register operand
|
|
const RegID& reg = opd.GetReg();
|
|
block->GetLifetime(reg.type).AddUsePoint(instr_offset, reg, opd.opdtype_, opd.GetSize(), opd.reg_assignable_);
|
|
var_manager.UpdateVarSize(reg.type, reg.id, opd.GetSize() / 8);
|
|
} else if (opd.IsMem()) {
|
|
// Memory operand
|
|
const RegID& base = opd.GetBase();
|
|
if (!base.IsInvalid()) {
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, base, static_cast<OpdType>(O_TYPE_REG | O_TYPE_READ), opd.GetAddressSize(), 0xFFFFFFFF);
|
|
var_manager.UpdateVarSize(R_TYPE_GP, base.id, opd.GetAddressSize() / 8);
|
|
}
|
|
const RegID& index = opd.GetIndex();
|
|
if (!index.IsInvalid()) {
|
|
block->GetLifetime(R_TYPE_GP).AddUsePoint(instr_offset, index, static_cast<OpdType>(O_TYPE_REG | O_TYPE_READ), opd.GetAddressSize(), 0xFFFFFFFF);
|
|
var_manager.UpdateVarSize(R_TYPE_GP, index.id, opd.GetAddressSize() / 8);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make GEN and KILL set
|
|
for (size_t reg_family = 0; reg_family < 3; ++reg_family) {
|
|
Lifetime& lifetime = block->lifetime[reg_family];
|
|
const size_t num_of_used_reg = lifetime.use_points.size();
|
|
for (size_t i = 0; i < num_of_used_reg; ++i) {
|
|
if (!lifetime.use_points[i].empty()) {
|
|
OpdType type = lifetime.use_points[i][0].type;
|
|
if (type & O_TYPE_READ) {
|
|
lifetime.gen.set_bit(i, true); // GEN
|
|
} else {
|
|
JITASM_ASSERT(type & O_TYPE_WRITE);
|
|
lifetime.kill.set_bit(i, true); // KILL
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
update_target.push_back(block);
|
|
}
|
|
|
|
while (!update_target.empty()) {
|
|
BasicBlock *block = update_target.back();
|
|
update_target.pop_back();
|
|
for (size_t reg_family = 0; reg_family < 3; ++reg_family) {
|
|
Lifetime& lifetime = block->lifetime[reg_family];
|
|
if (lifetime.dirty_live_out) {
|
|
// live_out is the union of the live_in of the successors
|
|
for (size_t i = 0; i < 2; ++i) {
|
|
if (block->successor[i]) {
|
|
lifetime.live_out.set_union(block->successor[i]->lifetime[reg_family].live_in);
|
|
}
|
|
}
|
|
lifetime.dirty_live_out = false;
|
|
|
|
// live_in = gen OR (live_out - kill)
|
|
BitVector new_live_in = lifetime.live_out;
|
|
new_live_in.set_subtract(lifetime.kill);
|
|
new_live_in.set_union(lifetime.gen);
|
|
|
|
if (!lifetime.live_in.is_equal(new_live_in)) {
|
|
lifetime.live_in.swap(new_live_in);
|
|
|
|
for (size_t i = 0; i < block->predecessor.size(); ++i) {
|
|
block->predecessor[i]->lifetime[reg_family].dirty_live_out = true;
|
|
update_target.push_back(block->predecessor[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Linear scan register allocation
|
|
/**
|
|
* \param[in,out] cfg Control flow graph and additional information
|
|
* \param[in] reg_family Register family
|
|
* \param[in] available_reg Available physical register mask
|
|
* \param[out] var_attrs Variable attributes
|
|
* \return Used physical register mask
|
|
*/
|
|
inline uint32 LinearScanRegisterAlloc(ControlFlowGraph& cfg, size_t reg_family, uint32 available_reg, std::vector<VarAttribute>& var_attrs)
|
|
{
|
|
const uint32 available_reg_count = detail::Count1Bits(available_reg);
|
|
|
|
std::vector<int> total_spill_cost;
|
|
for (ControlFlowGraph::BlockList::iterator block = cfg.begin(); block != cfg.end(); ++block) {
|
|
(*block)->lifetime[reg_family].BuildIntervals();
|
|
(*block)->lifetime[reg_family].GetSpillCost((*block)->GetFrequency(), total_spill_cost);
|
|
}
|
|
|
|
uint32 used_reg = 0;
|
|
const Lifetime::Interval *last_interval = NULL;
|
|
size_t last_loop_depth = 0;
|
|
for (ControlFlowGraph::BlockList::iterator block = cfg.dfs_begin(); block != cfg.dfs_end(); ++block) {
|
|
Lifetime& lifetime = (*block)->lifetime[reg_family];
|
|
const size_t loop_depth = (*block)->loop_depth;
|
|
|
|
// Spill identification
|
|
lifetime.SpillIdentification(available_reg_count, total_spill_cost, (*block)->GetFrequency(), last_loop_depth == loop_depth ? last_interval : NULL, var_attrs);
|
|
|
|
// Register assignment
|
|
used_reg |= lifetime.AssignRegister(available_reg, last_interval);
|
|
|
|
#ifdef JITASM_DEBUG_DUMP
|
|
lifetime.DumpIntervals((*block)->depth, true);
|
|
#endif
|
|
if (!lifetime.intervals.empty()) {
|
|
last_interval = &lifetime.intervals.back();
|
|
last_loop_depth = loop_depth;
|
|
}
|
|
}
|
|
|
|
return used_reg;
|
|
}
|
|
|
|
/// General purpose register operator
|
|
struct GpRegOperator
|
|
{
|
|
Frontend *f_;
|
|
const VariableManager *var_manager_;
|
|
|
|
GpRegOperator(Frontend *f, const VariableManager *var_manager) : f_(f), var_manager_(var_manager) {}
|
|
|
|
void Move(PhysicalRegID dst_reg, PhysicalRegID src_reg, size_t /*size*/)
|
|
{
|
|
f_->mov(Reg(dst_reg), Reg(src_reg));
|
|
}
|
|
|
|
void Swap(PhysicalRegID reg1, PhysicalRegID reg2, size_t /*size*/)
|
|
{
|
|
f_->xchg(Reg(reg1), Reg(reg2));
|
|
}
|
|
|
|
void Load(PhysicalRegID dst_reg, int var)
|
|
{
|
|
f_->mov(Reg(dst_reg), f_->ptr[var_manager_->GetSpillSlot(0, var)]);
|
|
}
|
|
|
|
void Store(int var, PhysicalRegID src_reg)
|
|
{
|
|
f_->mov(f_->ptr[var_manager_->GetSpillSlot(0, var)], Reg(src_reg));
|
|
}
|
|
};
|
|
|
|
/// MMX register operator
|
|
struct MmxRegOperator
|
|
{
|
|
Frontend *f_;
|
|
const VariableManager *var_manager_;
|
|
|
|
MmxRegOperator(Frontend *f, const VariableManager *var_manager) : f_(f), var_manager_(var_manager) {}
|
|
|
|
void Move(PhysicalRegID dst_reg, PhysicalRegID src_reg, size_t /*size*/)
|
|
{
|
|
f_->movq(MmxReg(dst_reg), MmxReg(src_reg));
|
|
}
|
|
|
|
void Swap(PhysicalRegID reg1, PhysicalRegID reg2, size_t /*size*/)
|
|
{
|
|
f_->pxor(MmxReg(reg1), MmxReg(reg2));
|
|
f_->pxor(MmxReg(reg2), MmxReg(reg1));
|
|
f_->pxor(MmxReg(reg1), MmxReg(reg2));
|
|
}
|
|
|
|
void Load(PhysicalRegID dst_reg, int var)
|
|
{
|
|
f_->movq(MmxReg(dst_reg), f_->mmword_ptr[var_manager_->GetSpillSlot(1, var)]);
|
|
}
|
|
|
|
void Store(int var, PhysicalRegID src_reg)
|
|
{
|
|
f_->movq(f_->mmword_ptr[var_manager_->GetSpillSlot(1, var)], MmxReg(src_reg));
|
|
}
|
|
};
|
|
|
|
/// XMM/YMM register operator
|
|
struct XmmRegOperator
|
|
{
|
|
Frontend *f_;
|
|
const VariableManager *var_manager_;
|
|
|
|
XmmRegOperator(Frontend *f, const VariableManager *var_manager) : f_(f), var_manager_(var_manager) {}
|
|
|
|
void Move(PhysicalRegID dst_reg, PhysicalRegID src_reg, size_t size)
|
|
{
|
|
if (size == 128 / 8) {
|
|
f_->movaps(XmmReg(dst_reg), XmmReg(src_reg));
|
|
} else if (size == 256 / 8) {
|
|
f_->vmovaps(YmmReg(dst_reg), YmmReg(src_reg));
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
void Swap(PhysicalRegID reg1, PhysicalRegID reg2, size_t size)
|
|
{
|
|
if (size == 128 / 8) {
|
|
f_->xorps(XmmReg(reg1), XmmReg(reg2));
|
|
f_->xorps(XmmReg(reg2), XmmReg(reg1));
|
|
f_->xorps(XmmReg(reg1), XmmReg(reg2));
|
|
} else if (size == 256 / 8) {
|
|
f_->vxorps(YmmReg(reg1), YmmReg(reg1), YmmReg(reg2));
|
|
f_->vxorps(YmmReg(reg2), YmmReg(reg1), YmmReg(reg2));
|
|
f_->vxorps(YmmReg(reg1), YmmReg(reg1), YmmReg(reg2));
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
void Load(PhysicalRegID dst_reg, int var)
|
|
{
|
|
const size_t size = var_manager_->GetVarSize(2, var);
|
|
if (size == 128 / 8) {
|
|
f_->movaps(XmmReg(dst_reg), f_->xmmword_ptr[var_manager_->GetSpillSlot(2, var)]);
|
|
} else if (size == 256 / 8) {
|
|
f_->vmovaps(YmmReg(dst_reg), f_->ymmword_ptr[var_manager_->GetSpillSlot(2, var)]);
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
void Store(int var, PhysicalRegID src_reg)
|
|
{
|
|
const size_t size = var_manager_->GetVarSize(2, var);
|
|
if (size == 128 / 8) {
|
|
f_->movaps(f_->xmmword_ptr[var_manager_->GetSpillSlot(2, var)], XmmReg(src_reg));
|
|
} else if (size == 256 / 8) {
|
|
f_->vmovaps(f_->ymmword_ptr[var_manager_->GetSpillSlot(2, var)], YmmReg(src_reg));
|
|
} else {
|
|
JITASM_ASSERT(0);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Strongly connected components finder
|
|
/**
|
|
* Tarjan's algorithm
|
|
*/
|
|
class SCCFinder {
|
|
private:
|
|
struct Node {
|
|
int index;
|
|
int lowlink;
|
|
Node() : index(-1) {}
|
|
};
|
|
Node nodes_[NUM_OF_PHYSICAL_REG];
|
|
int *successors_;
|
|
int index;
|
|
FixedArray<int, NUM_OF_PHYSICAL_REG> scc_;
|
|
|
|
/// Is v in scc_?
|
|
bool IsInsideSCC(int v) const
|
|
{
|
|
for (size_t i = 0; i < scc_.size(); ++i) {
|
|
if (scc_[i] == v) {return true;}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template<class Fn> void Find(int v, Fn& fn)
|
|
{
|
|
nodes_[v].index = index;
|
|
nodes_[v].lowlink = index;
|
|
++index;
|
|
scc_.push_back(v);
|
|
const int w = successors_[v];
|
|
if (w != -1) {
|
|
if (nodes_[w].index == -1) {
|
|
// successor w has not been visited yet
|
|
Find(w, fn);
|
|
if (nodes_[w].lowlink < nodes_[v].lowlink) {
|
|
nodes_[v].lowlink = nodes_[w].lowlink;
|
|
}
|
|
} else if (IsInsideSCC(w)) {
|
|
// successor w is in scc_
|
|
if (nodes_[w].index < nodes_[v].lowlink) {
|
|
nodes_[v].lowlink = nodes_[w].index;
|
|
}
|
|
}
|
|
}
|
|
if (nodes_[v].lowlink == nodes_[v].index && !scc_.empty()) {
|
|
// v is the root of scc_
|
|
size_t i = 0;
|
|
while (scc_[i] != v) {++i;}
|
|
fn(&scc_[i], scc_.size() - i);
|
|
while (i < scc_.size()) {scc_.pop_back();}
|
|
}
|
|
}
|
|
|
|
public:
|
|
SCCFinder(int *successors) : successors_(successors), index(0) {}
|
|
|
|
template<class Fn> void operator()(Fn fn)
|
|
{
|
|
for (int v = 0; v < NUM_OF_PHYSICAL_REG; ++v) {
|
|
if (successors_[v] != -1 && nodes_[v].index == -1) {
|
|
Find(v, fn);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
struct Operations {
|
|
int move[NUM_OF_PHYSICAL_REG];
|
|
int load[NUM_OF_PHYSICAL_REG];
|
|
int store[NUM_OF_PHYSICAL_REG];
|
|
uint8 size[NUM_OF_PHYSICAL_REG];
|
|
std::pair<const Lifetime::Interval *, const Lifetime::Interval *> interval;
|
|
const std::vector<VarAttribute> *var_attrs;
|
|
|
|
Operations(const Lifetime::Interval *first, const Lifetime::Interval *second, const std::vector<VarAttribute> *vattrs) : interval(first, second), var_attrs(vattrs) {
|
|
for (size_t i = 0; i < NUM_OF_PHYSICAL_REG; ++i) {move[i] = load[i] = store[i] = -1;}
|
|
}
|
|
|
|
void operator()(size_t var) {
|
|
if (interval.second->liveness.get_bit(var)) {
|
|
const bool first_spill = interval.first->spill.get_bit(var);
|
|
const bool second_spill = interval.second->spill.get_bit(var);
|
|
if (!first_spill) {
|
|
const int first_reg = interval.first->assignment_table[var];
|
|
if (!second_spill) {
|
|
// register -> register
|
|
move[first_reg] = interval.second->assignment_table[var];
|
|
size[first_reg] = var_attrs->at(var).size;
|
|
} else {
|
|
// register -> stack
|
|
store[first_reg] = static_cast<int>(var);
|
|
}
|
|
} else {
|
|
if (!second_spill) {
|
|
// stack -> register
|
|
load[interval.second->assignment_table[var]] = static_cast<int>(var);
|
|
} else {
|
|
// stack -> stack
|
|
// do nothing
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
template<class RegOp>
|
|
struct MoveGenerator {
|
|
int *moves_;
|
|
uint8 *sizes_;
|
|
RegOp *reg_operator_;
|
|
MoveGenerator(int *moves, uint8 *sizes, RegOp *reg_operator) : moves_(moves), sizes_(sizes), reg_operator_(reg_operator) {}
|
|
void operator()(const int *scc, size_t count) {
|
|
if (count > 1) {
|
|
for (size_t i = 0; i < count - 1; ++i) {
|
|
const int r = scc[i];
|
|
JITASM_ASSERT(r != moves_[r] && moves_[r] != -1);
|
|
reg_operator_->Swap(static_cast<PhysicalRegID>(moves_[r]), static_cast<PhysicalRegID>(r), sizes_[r]);
|
|
JITASM_TRACE("Swap%d %d <-> %d\n", sizes_[r] * 8, moves_[r], r);
|
|
}
|
|
} else if (moves_[scc[0]] != scc[0] && moves_[scc[0]] != -1) {
|
|
const int r = scc[0];
|
|
reg_operator_->Move(static_cast<PhysicalRegID>(moves_[r]), static_cast<PhysicalRegID>(r), sizes_[r]);
|
|
JITASM_TRACE("Move%d %d -> %d\n", sizes_[r] * 8, r, moves_[r]);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Generate inter-interval instructions
|
|
/**
|
|
* - Move register
|
|
* - Load from stack slot
|
|
* - Store to stack slot
|
|
*/
|
|
template<class RegOp>
|
|
inline void GenerateInterIntervalInstr(const Lifetime::Interval& first_interval, const Lifetime::Interval& second_interval, const std::vector<VarAttribute>& var_attrs, RegOp reg_operator)
|
|
{
|
|
#ifdef JITASM_DEBUG_DUMP
|
|
first_interval.Dump(true);
|
|
#endif
|
|
|
|
Operations ops(&first_interval, &second_interval, &var_attrs);
|
|
first_interval.liveness.query_bit_indexes(ops);
|
|
|
|
// Store instructions
|
|
for (size_t r = 0; r < NUM_OF_PHYSICAL_REG; ++r) {
|
|
if (ops.store[r] != -1) {
|
|
reg_operator.Store(ops.store[r], static_cast<PhysicalRegID>(r));
|
|
JITASM_TRACE("Store %d (physical reg %d)\n", ops.store[r], r);
|
|
}
|
|
}
|
|
|
|
// Move instructions
|
|
SCCFinder scc_finder(ops.move);
|
|
scc_finder(MoveGenerator<RegOp>(ops.move, ops.size, ®_operator));
|
|
|
|
// Load instructions
|
|
for (size_t r = 0; r < NUM_OF_PHYSICAL_REG; ++r) {
|
|
if (ops.load[r] != -1) {
|
|
reg_operator.Load(static_cast<PhysicalRegID>(r), ops.load[r]);
|
|
JITASM_TRACE("Load %d (physical reg %d)\n", ops.load[r], r);
|
|
}
|
|
}
|
|
|
|
#ifdef JITASM_DEBUG_DUMP
|
|
second_interval.Dump(true);
|
|
#endif
|
|
}
|
|
|
|
/// Generate inter-block instructions
|
|
inline void GenerateInterBlockInstr(const BasicBlock *first_block, const BasicBlock *second_block, Frontend& f, const VariableManager& var_manager)
|
|
{
|
|
if (!first_block->lifetime[0].intervals.empty() && !second_block->lifetime[0].intervals.empty()) {
|
|
JITASM_TRACE("---- General purpose register ----\n");
|
|
GenerateInterIntervalInstr(first_block->lifetime[0].intervals.back(), second_block->lifetime[0].intervals.front(), var_manager.GetAttributes(0), GpRegOperator(&f, &var_manager));
|
|
}
|
|
if (!first_block->lifetime[1].intervals.empty() && !second_block->lifetime[1].intervals.empty()) {
|
|
JITASM_TRACE("---- MMX register ----\n");
|
|
GenerateInterIntervalInstr(first_block->lifetime[1].intervals.back(), second_block->lifetime[1].intervals.front(), var_manager.GetAttributes(1), MmxRegOperator(&f, &var_manager));
|
|
}
|
|
if (!first_block->lifetime[2].intervals.empty() && !second_block->lifetime[2].intervals.empty()) {
|
|
JITASM_TRACE("---- XMM/YMM register ----\n");
|
|
GenerateInterIntervalInstr(first_block->lifetime[2].intervals.back(), second_block->lifetime[2].intervals.front(), var_manager.GetAttributes(2), XmmRegOperator(&f, &var_manager));
|
|
}
|
|
}
|
|
|
|
/// Generate prolog instructions
|
|
inline void GenerateProlog(Frontend& f, const uint32 (&preserved_reg)[3], const Addr& preserved_reg_stack)
|
|
{
|
|
avoid_unused_warn(preserved_reg_stack);
|
|
|
|
f.push(f.zbp);
|
|
f.mov(f.zbp, f.zsp);
|
|
|
|
size_t stack_size = f.stack_manager_.GetSize();
|
|
|
|
// Save general-purpose registers
|
|
size_t num_of_preserved_gp_reg = 0;
|
|
for (uint32 reg_mask = preserved_reg[0]; reg_mask != 0; ++num_of_preserved_gp_reg) {
|
|
uint32 reg_id = detail::bit_scan_forward(reg_mask);
|
|
f.push(Reg(static_cast<PhysicalRegID>(reg_id)));
|
|
reg_mask &= ~(1 << reg_id);
|
|
}
|
|
|
|
#ifdef JITASM64
|
|
// Stack base
|
|
if (stack_size > 0) {
|
|
if (num_of_preserved_gp_reg & 1) {
|
|
// Copy with alignment
|
|
f.lea(f.rbx, f.ptr[f.rsp - 8]);
|
|
stack_size += 8; // padding for keep alignment
|
|
} else {
|
|
f.mov(f.rbx, f.rsp);
|
|
}
|
|
}
|
|
#else
|
|
if (stack_size > 0) {
|
|
// Align stack pointer
|
|
f.and_(f.esp, 0xFFFFFFF0);
|
|
|
|
// Stack base
|
|
f.mov(f.ebx, f.esp);
|
|
}
|
|
#endif
|
|
|
|
// Move stack pointer
|
|
if (stack_size > 0) {
|
|
f.sub(f.zsp, static_cast<uint32>(stack_size));
|
|
}
|
|
|
|
#ifdef JITASM64
|
|
// Save xmm registers
|
|
uint32 xmm_reg_mask = preserved_reg[2];
|
|
for (size_t i = 0; xmm_reg_mask != 0; ++i) {
|
|
uint32 reg_id = detail::bit_scan_forward(xmm_reg_mask);
|
|
f.movaps(f.xmmword_ptr[preserved_reg_stack + 16 * i], XmmReg(static_cast<PhysicalRegID>(reg_id)));
|
|
xmm_reg_mask &= ~(1 << reg_id);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/// Generate epilog instructions
|
|
inline void GenerateEpilog(Frontend& f, const uint32 (&preserved_reg)[3], const Addr& preserved_reg_stack)
|
|
{
|
|
avoid_unused_warn(preserved_reg_stack);
|
|
|
|
size_t stack_size = f.stack_manager_.GetSize();
|
|
const size_t num_of_preserved_gp_reg = detail::Count1Bits(preserved_reg[0]);
|
|
|
|
#ifdef JITASM64
|
|
// Restore xmm registers
|
|
// Push the register id and index by saved order
|
|
FixedArray<uint32, 16> regs;
|
|
for (uint32 reg_mask = preserved_reg[2]; reg_mask != 0; ) {
|
|
uint32 reg_id = detail::bit_scan_forward(reg_mask);
|
|
regs.push_back(reg_id);
|
|
reg_mask &= ~(1 << reg_id);
|
|
}
|
|
|
|
// Insert restore instruction by inverse order
|
|
while (!regs.empty()) {
|
|
f.movaps(XmmReg(static_cast<PhysicalRegID>(regs.back())), f.xmmword_ptr[preserved_reg_stack + 16 * (regs.size() - 1)]);
|
|
regs.pop_back();
|
|
}
|
|
|
|
// Move stack pointer
|
|
if (stack_size > 0) {
|
|
if (num_of_preserved_gp_reg & 1) {
|
|
stack_size += 8; // padding for keep alignment
|
|
}
|
|
f.add(f.zsp, static_cast<uint32>(stack_size));
|
|
}
|
|
#else
|
|
// Move stack pointer
|
|
if (stack_size > 0) {
|
|
f.lea(f.zsp, f.ptr[f.zbp - num_of_preserved_gp_reg * 4]);
|
|
}
|
|
#endif
|
|
|
|
// Restore general-purpose registers
|
|
for (uint32 reg_mask = preserved_reg[0]; reg_mask != 0; ) {
|
|
uint32 reg_id = detail::bit_scan_reverse(reg_mask);
|
|
f.pop(Reg(static_cast<PhysicalRegID>(reg_id)));
|
|
reg_mask &= ~(1 << reg_id);
|
|
}
|
|
|
|
f.pop(f.zbp);
|
|
f.ret();
|
|
}
|
|
|
|
struct OrderedLabel {
|
|
size_t id;
|
|
size_t instr_idx;
|
|
OrderedLabel(size_t id_, size_t instr_idx_) : id(id_), instr_idx(instr_idx_) {}
|
|
bool operator<(const OrderedLabel& rhs) const {return instr_idx < rhs.instr_idx;}
|
|
};
|
|
|
|
/// Rewrite instructions
|
|
/**
|
|
* - Replace symbolic register to physical register
|
|
* - Generate instructions for register move and spill
|
|
* - Generate function prolog and epilog
|
|
*/
|
|
inline void RewriteInstructions(Frontend& f, const ControlFlowGraph& cfg, const VariableManager& var_manager, const uint32 (&preserved_reg)[3], const Addr& preserved_reg_stack)
|
|
{
|
|
// Prepare instruction number ordered labels for adjusting label position
|
|
std::vector<OrderedLabel> orderd_labels; // instruction number order
|
|
orderd_labels.reserve(f.labels_.size());
|
|
for (size_t i = 0; i < f.labels_.size(); ++i) {
|
|
orderd_labels.push_back(OrderedLabel(i, f.labels_[i].instr_number));
|
|
}
|
|
std::sort(orderd_labels.begin(), orderd_labels.end());
|
|
std::vector<OrderedLabel>::iterator cur_label = orderd_labels.begin();
|
|
|
|
// Move original instruction list
|
|
// Now the instruction list in Frontend is empty!
|
|
Frontend::InstrList org_instrs;
|
|
org_instrs.swap(f.instrs_);
|
|
f.instrs_.reserve(org_instrs.size());
|
|
|
|
for (ControlFlowGraph::BlockList::const_iterator it = cfg.begin(); it != cfg.end(); ++it) {
|
|
const BasicBlock *block = *it;
|
|
JITASM_TRACE("\n==== Block%d ====\n", block->depth);
|
|
if (block->depth == (size_t)-1) {
|
|
// Eliminate unreachable code block
|
|
JITASM_TRACE("Unreachable block!\n");
|
|
|
|
// Invalidate labels in this block
|
|
while (cur_label != orderd_labels.end() && cur_label->instr_idx < block->instr_end) {
|
|
f.labels_[cur_label->id].instr_number = (size_t)-1;
|
|
++cur_label;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Initialize interval_range
|
|
detail::ConstRange< std::vector<Lifetime::Interval> > interval_range[3];
|
|
for (size_t reg_family = 0; reg_family < 3; ++reg_family) {
|
|
interval_range[reg_family].first = block->lifetime[reg_family].intervals.begin();
|
|
interval_range[reg_family].second = block->lifetime[reg_family].intervals.end();
|
|
}
|
|
|
|
const size_t instr_size = block->instr_end - block->instr_begin;
|
|
for (size_t instr_offset = 0; instr_offset < instr_size; ++instr_offset) {
|
|
const size_t org_instr_index = block->instr_begin + instr_offset;
|
|
|
|
// Step each intervals and insert inter-interval instructions
|
|
if (interval_range[0].size() > 1 && detail::next(interval_range[0].first)->instr_idx_offset == instr_offset) {
|
|
JITASM_TRACE("---- General purpose register ----\n");
|
|
const Lifetime::Interval& first_interval = *interval_range[0].first;
|
|
GenerateInterIntervalInstr(first_interval, *++interval_range[0].first, var_manager.GetAttributes(0), GpRegOperator(&f, &var_manager));
|
|
}
|
|
if (interval_range[1].size() > 1 && detail::next(interval_range[1].first)->instr_idx_offset == instr_offset) {
|
|
JITASM_TRACE("---- MMX register ----\n");
|
|
const Lifetime::Interval& first_interval = *interval_range[1].first;
|
|
GenerateInterIntervalInstr(first_interval, *++interval_range[1].first, var_manager.GetAttributes(1), MmxRegOperator(&f, &var_manager));
|
|
}
|
|
if (interval_range[2].size() > 1 && detail::next(interval_range[2].first)->instr_idx_offset == instr_offset) {
|
|
JITASM_TRACE("---- XMM/YMM register ----\n");
|
|
const Lifetime::Interval& first_interval = *interval_range[2].first;
|
|
GenerateInterIntervalInstr(first_interval, *++interval_range[2].first, var_manager.GetAttributes(2), XmmRegOperator(&f, &var_manager));
|
|
}
|
|
|
|
const size_t cur_instr_index = f.instrs_.size();
|
|
const InstrID instr_id = org_instrs[org_instr_index].GetID();
|
|
if (instr_id == I_COMPILER_DECLARE_REG_ARG || instr_id == I_COMPILER_DECLARE_STACK_ARG || instr_id == I_COMPILER_DECLARE_RESULT_REG) {
|
|
// No actual machine code
|
|
} else if (instr_id == I_COMPILER_PROLOG) {
|
|
// Generate function prolog
|
|
GenerateProlog(f, preserved_reg, preserved_reg_stack);
|
|
} else if (instr_id == I_COMPILER_EPILOG) {
|
|
// Generate function epilog
|
|
GenerateEpilog(f, preserved_reg, preserved_reg_stack);
|
|
} else {
|
|
// Copy instruction
|
|
f.instrs_.push_back(org_instrs[org_instr_index]);
|
|
Instr &instr = f.instrs_.back();
|
|
|
|
// Replace symbolic register to physical register
|
|
for (size_t i = 0; i < Instr::MAX_OPERAND_COUNT; ++i) {
|
|
detail::Opd& opd = instr.GetOpd(i);
|
|
if (opd.IsReg()) {
|
|
if (opd.reg_.IsSymbolic()) {
|
|
opd.reg_.type = static_cast<RegType>(opd.reg_.type - R_TYPE_SYMBOLIC_GP);
|
|
opd.reg_.id = interval_range[GetRegFamily(opd.reg_.type)].first->assignment_table[opd.reg_.id];
|
|
}
|
|
} else if (opd.IsMem()) {
|
|
if (opd.base_.IsSymbolic()) {
|
|
opd.base_.type = R_TYPE_GP;
|
|
opd.base_.id = interval_range[0].first->assignment_table[opd.base_.id];
|
|
}
|
|
if (opd.index_.IsSymbolic()) {
|
|
opd.index_.type = R_TYPE_GP;
|
|
opd.index_.id = interval_range[0].first->assignment_table[opd.index_.id];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Adjust label position
|
|
while (cur_label != orderd_labels.end() && cur_label->instr_idx == org_instr_index) {
|
|
f.labels_[cur_label->id].instr_number += cur_instr_index - org_instr_index;
|
|
++cur_label;
|
|
}
|
|
}
|
|
|
|
// Generate inter-block instructions
|
|
JITASM_ASSERT(!(!block->successor[0] && block->successor[1]));
|
|
if (block->successor[0] && !block->successor[1]) {
|
|
// 1 successor
|
|
|
|
// Remove last instruction if it is jump
|
|
Instr jump_instr(I_NOP, 0, 0);
|
|
if (Frontend::IsJump(f.instrs_.back().GetID())) {
|
|
jump_instr = f.instrs_.back();
|
|
f.instrs_.pop_back();
|
|
}
|
|
|
|
JITASM_TRACE("==== Edge to Block%d\n", block->successor[0]->depth);
|
|
GenerateInterBlockInstr(block, block->successor[0], f, var_manager);
|
|
|
|
// Add last instruction if it is jump
|
|
if (Frontend::IsJump(jump_instr.GetID())) {
|
|
f.instrs_.push_back(jump_instr);
|
|
}
|
|
} else if (block->successor[0] && block->successor[1]) {
|
|
// 2 successors
|
|
JITASM_ASSERT(Frontend::IsJump(f.instrs_.back().GetID()) && f.instrs_.back().GetOpd(0).IsImm()); // the last instruction must be jump
|
|
const size_t jump_instr_idx = f.instrs_.size() - 1;
|
|
const size_t label_successor1 = static_cast<size_t>(f.instrs_.back().GetOpd(0).GetImm());
|
|
|
|
// Generate inter-block instructions between current block and successor 1 separately
|
|
Frontend::InstrList temp_instrs;
|
|
temp_instrs.swap(f.instrs_);
|
|
JITASM_TRACE("==== Edge to Block%d\n", block->successor[1]->depth);
|
|
GenerateInterBlockInstr(block, block->successor[1], f, var_manager);
|
|
temp_instrs.swap(f.instrs_);
|
|
|
|
// Insert inter-block instructions between current block and successor 0
|
|
JITASM_TRACE("==== Edge to Block%d\n", block->successor[0]->depth);
|
|
GenerateInterBlockInstr(block, block->successor[0], f, var_manager);
|
|
|
|
if (!temp_instrs.empty()) {
|
|
// Insert inter-block instructions between current block and successor 1 and change jump flow
|
|
|
|
// Jump to successor0
|
|
const size_t label_successor0 = f.NewLabelID("");
|
|
f.AppendJmp(label_successor0);
|
|
|
|
// Change conditional jump to successor1_edge instead of successor1
|
|
const size_t label_successor1_edge = f.NewLabelID("");
|
|
f.L(label_successor1_edge);
|
|
Frontend::ChangeLabelID(f.instrs_[jump_instr_idx], label_successor1_edge);
|
|
|
|
// Insert instructions
|
|
f.instrs_.insert(f.instrs_.end(), temp_instrs.begin(), temp_instrs.end());
|
|
f.AppendJmp(label_successor1);
|
|
|
|
// Label of successor0 block
|
|
f.L(label_successor0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Compile
|
|
inline void Compile(Frontend& f)
|
|
{
|
|
#ifdef JITASM64
|
|
// Available registers : rax, rcx, rdx, rsi, rdi, r8 ~ r15, mm0 ~ mm7, xmm0/ymm0 ~ xmm15/ymm15
|
|
const uint32 available_reg[3] = {0xFFC7, 0xFF, 0xFFFF};
|
|
|
|
// Preserved registers : rbx, rsi, rdi, r12 ~ r15, xmm6 ~ xmm15
|
|
uint32 preserved_reg[3] = {0xF0C8, 0, 0xFFC0};
|
|
#else
|
|
// Available registers : eax, ecx, edx, esi, edi, mm0 ~ mm7, xmm0/ymm0 ~ xmm7/ymm7
|
|
const uint32 available_reg[3] = {0xC7, 0xFF, 0xFF};
|
|
|
|
// Preserved registers : ebx, esi, edi
|
|
uint32 preserved_reg[3] = {0xC8, 0, 0};
|
|
#endif
|
|
|
|
uint32 used_physical_reg[3];
|
|
bool need_reg_alloc[3];
|
|
if (!PrepareCompile(f.instrs_, used_physical_reg, need_reg_alloc)) {
|
|
// No compile process
|
|
return;
|
|
}
|
|
|
|
VariableManager var_manager;
|
|
ControlFlowGraph cfg;
|
|
|
|
if (need_reg_alloc[0] || need_reg_alloc[1] || need_reg_alloc[2]) {
|
|
// Register allocation process
|
|
|
|
// Build CFG including loop detection
|
|
cfg.Build(f);
|
|
|
|
// Live variable analysis
|
|
LiveVariableAnalysis(f, cfg, var_manager);
|
|
|
|
// Linear scan register allocation
|
|
for (size_t reg_family = 0; reg_family < 3; ++reg_family) {
|
|
if (need_reg_alloc[reg_family]) {
|
|
used_physical_reg[reg_family] = LinearScanRegisterAlloc(cfg, reg_family, available_reg[reg_family], var_manager.GetAttributes(reg_family));
|
|
}
|
|
}
|
|
} else {
|
|
// No register allocation
|
|
// Build dummy CFG
|
|
cfg.BuildDummy(f);
|
|
}
|
|
|
|
#ifdef JITASM_DEBUG_DUMP
|
|
cfg.DumpDot();
|
|
#endif
|
|
|
|
// Identify saving registers
|
|
preserved_reg[0] &= used_physical_reg[0];
|
|
preserved_reg[1] &= used_physical_reg[1];
|
|
preserved_reg[2] &= used_physical_reg[2];
|
|
|
|
// Reserve stack for saving xmm register
|
|
Addr preserved_reg_stack(RegID::Invalid(), 0);
|
|
if (preserved_reg[2] != 0) {
|
|
// For saving xmm registers
|
|
preserved_reg_stack = f.stack_manager_.Alloc(detail::Count1Bits(preserved_reg[2]) * 16, 16);
|
|
}
|
|
|
|
// Allocate stack for spill variable
|
|
var_manager.AllocSpillSlots(f.stack_manager_);
|
|
|
|
// ebx(rbx) does not include in used_physical_reg
|
|
// because ebx(rbx) is going to be modified in prolog.
|
|
if (f.stack_manager_.GetSize() > 0) {
|
|
preserved_reg[0] |= (1 << EBX);
|
|
}
|
|
|
|
RewriteInstructions(f, cfg, var_manager, preserved_reg, preserved_reg_stack);
|
|
}
|
|
|
|
} // namespace compiler
|
|
|
|
namespace detail
|
|
{
|
|
struct CondExpr {
|
|
virtual void operator()(Frontend& f, size_t beg, size_t end) const = 0;
|
|
virtual ~CondExpr() {}
|
|
};
|
|
|
|
// &&
|
|
struct CondExpr_ExprAnd : CondExpr {
|
|
const CondExpr& lhs_;
|
|
const CondExpr& rhs_;
|
|
CondExpr_ExprAnd(const CondExpr& lhs, const CondExpr& rhs) : lhs_(lhs), rhs_(rhs) {}
|
|
void operator()(Frontend& f, size_t beg, size_t end) const {
|
|
size_t label = f.NewLabelID("");
|
|
lhs_(f, label, end);
|
|
f.L(label);
|
|
rhs_(f, beg, end);
|
|
}
|
|
CondExpr_ExprAnd& operator=(const CondExpr_ExprAnd&);
|
|
};
|
|
|
|
// ||
|
|
struct CondExpr_ExprOr : CondExpr {
|
|
const CondExpr& lhs_;
|
|
const CondExpr& rhs_;
|
|
CondExpr_ExprOr(const CondExpr& lhs, const CondExpr& rhs) : lhs_(lhs), rhs_(rhs) {}
|
|
void operator()(Frontend& f, size_t beg, size_t end) const {
|
|
size_t label = f.NewLabelID("");
|
|
lhs_(f, beg, label);
|
|
f.L(label);
|
|
rhs_(f, beg, end);
|
|
}
|
|
CondExpr_ExprOr& operator=(const CondExpr_ExprOr&);
|
|
};
|
|
|
|
// cmp
|
|
template<class L, class R, JumpCondition Jcc>
|
|
struct CondExpr_Cmp : CondExpr {
|
|
L lhs_;
|
|
R rhs_;
|
|
CondExpr_Cmp(const L& lhs, const R& rhs) : lhs_(lhs), rhs_(rhs) {}
|
|
void operator()(Frontend& f, size_t beg, size_t end) const {
|
|
f.cmp(lhs_, rhs_);
|
|
f.AppendJcc(Jcc, beg);
|
|
f.AppendJmp(end);
|
|
}
|
|
};
|
|
|
|
// or
|
|
template<class L, class R, JumpCondition Jcc>
|
|
struct CondExpr_Or : CondExpr {
|
|
L lhs_;
|
|
R rhs_;
|
|
CondExpr_Or(const L& lhs, const R& rhs) : lhs_(lhs), rhs_(rhs) {}
|
|
void operator()(Frontend& f, size_t beg, size_t end) const {
|
|
f.or_(lhs_, rhs_);
|
|
f.AppendJcc(Jcc, beg);
|
|
f.AppendJmp(end);
|
|
}
|
|
};
|
|
}
|
|
|
|
// &&
|
|
inline detail::CondExpr_ExprAnd operator&&(const detail::CondExpr& lhs, const detail::CondExpr& rhs) {return detail::CondExpr_ExprAnd(lhs, rhs);}
|
|
// ||
|
|
inline detail::CondExpr_ExprOr operator||(const detail::CondExpr& lhs, const detail::CondExpr& rhs) {return detail::CondExpr_ExprOr(lhs, rhs);}
|
|
|
|
// !
|
|
inline detail::CondExpr_Or<Reg8, Reg8, JCC_E> operator!(const Reg8& lhs) {return detail::CondExpr_Or<Reg8, Reg8, JCC_E>(lhs, lhs);}
|
|
inline detail::CondExpr_Or<Reg16, Reg16, JCC_E> operator!(const Reg16& lhs) {return detail::CondExpr_Or<Reg16, Reg16, JCC_E>(lhs, lhs);}
|
|
inline detail::CondExpr_Or<Reg32, Reg32, JCC_E> operator!(const Reg32& lhs) {return detail::CondExpr_Or<Reg32, Reg32, JCC_E>(lhs, lhs);}
|
|
#ifdef JITASM64
|
|
inline detail::CondExpr_Or<Reg64, Reg64, JCC_E> operator!(const Reg64& lhs) {return detail::CondExpr_Or<Reg64, Reg64, JCC_E>(lhs, lhs);}
|
|
#endif
|
|
inline detail::CondExpr_Cmp<Mem8, Imm8, JCC_E> operator!(const Mem8& lhs) {return detail::CondExpr_Cmp<Mem8, Imm8, JCC_E>(lhs, 0);}
|
|
inline detail::CondExpr_Cmp<Mem16, Imm16, JCC_E> operator!(const Mem16& lhs) {return detail::CondExpr_Cmp<Mem16, Imm16, JCC_E>(lhs, 0);}
|
|
inline detail::CondExpr_Cmp<Mem32, Imm32, JCC_E> operator!(const Mem32& lhs) {return detail::CondExpr_Cmp<Mem32, Imm32, JCC_E>(lhs, 0);}
|
|
#ifdef JITASM64
|
|
inline detail::CondExpr_Cmp<Mem64, Imm32, JCC_E> operator!(const Mem64& lhs) {return detail::CondExpr_Cmp<Mem64, Imm32, JCC_E>(lhs, 0);}
|
|
#endif
|
|
|
|
// <
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_B> operator<(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_B>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_B> operator<(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_B>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_B> operator<(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_B>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_B> operator<(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_B>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_B> operator<(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_B>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_B> operator<(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_B>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_B> operator<(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_B>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_B> operator<(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_B>(lhs, rhs);}
|
|
#endif
|
|
|
|
// >
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_A> operator>(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_A>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_A> operator>(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_A>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_A> operator>(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_A>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_A> operator>(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_A>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_A> operator>(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_A>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_A> operator>(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_A>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_A> operator>(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_A>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_A> operator>(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_A>(lhs, rhs);}
|
|
#endif
|
|
|
|
// <=
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_BE> operator<=(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_BE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_BE> operator<=(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_BE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_BE> operator<=(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_BE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_BE> operator<=(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_BE>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_BE> operator<=(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_BE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_BE> operator<=(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_BE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_BE> operator<=(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_BE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_BE> operator<=(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_BE>(lhs, rhs);}
|
|
#endif
|
|
|
|
// >=
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_AE> operator>=(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_AE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_AE> operator>=(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_AE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_AE> operator>=(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_AE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_AE> operator>=(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_AE>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_AE> operator>=(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_AE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_AE> operator>=(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_AE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_AE> operator>=(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_AE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_AE> operator>=(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_AE>(lhs, rhs);}
|
|
#endif
|
|
|
|
// ==
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_E> operator==(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_E>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_E> operator==(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_E>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_E> operator==(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_E>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_E> operator==(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_E>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_E> operator==(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_E>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_E> operator==(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_E>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_E> operator==(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_E>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_E> operator==(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_E>(lhs, rhs);}
|
|
#endif
|
|
|
|
// !=
|
|
template<class R> detail::CondExpr_Cmp<Reg8, R, JCC_NE> operator!=(const Reg8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg8, R, JCC_NE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg16, R, JCC_NE> operator!=(const Reg16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg16, R, JCC_NE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Reg32, R, JCC_NE> operator!=(const Reg32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg32, R, JCC_NE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Reg64, R, JCC_NE> operator!=(const Reg64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Reg64, R, JCC_NE>(lhs, rhs);}
|
|
#endif
|
|
template<class R> detail::CondExpr_Cmp<Mem8, R, JCC_NE> operator!=(const Mem8& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem8, R, JCC_NE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem16, R, JCC_NE> operator!=(const Mem16& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem16, R, JCC_NE>(lhs, rhs);}
|
|
template<class R> detail::CondExpr_Cmp<Mem32, R, JCC_NE> operator!=(const Mem32& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem32, R, JCC_NE>(lhs, rhs);}
|
|
#ifdef JITASM64
|
|
template<class R> detail::CondExpr_Cmp<Mem64, R, JCC_NE> operator!=(const Mem64& lhs, const R& rhs) {return detail::CondExpr_Cmp<Mem64, R, JCC_NE>(lhs, rhs);}
|
|
#endif
|
|
|
|
namespace detail {
|
|
// Flags for ArgumentTraits
|
|
enum {
|
|
ARG_IN_REG = (1<<0), ///< Argument is stored in general purpose register.
|
|
ARG_IN_STACK = (1<<1), ///< Argument is stored on stack.
|
|
ARG_IN_MMX = (1<<2), ///< Argument is stored in mmx register.
|
|
ARG_IN_XMM_SP = (1<<3), ///< Argument is stored in xmm register as single precision.
|
|
ARG_IN_XMM_DP = (1<<4), ///< Argument is stored in xmm register as double precision.
|
|
ARG_IN_XMM_INT = (1<<5), ///< Argument is stored in xmm register as integer.
|
|
ARG_TYPE_VALUE = (1<<6), ///< Argument is value which is passed.
|
|
ARG_TYPE_PTR = (1<<7) ///< Argument is pointer which is passed to.
|
|
};
|
|
|
|
/// cdecl argument type traits
|
|
template<int N, class T, int Size = sizeof(T)>
|
|
struct ArgTraits_cdecl {
|
|
enum {
|
|
stack_size = (Size + 4 - 1) / 4 * 4,
|
|
flag = ARG_IN_STACK | ARG_TYPE_VALUE,
|
|
reg_id = INVALID
|
|
};
|
|
};
|
|
|
|
#if JITASM_MMINTRIN
|
|
// specialization for __m64
|
|
template<int N> struct ArgTraits_cdecl<N, __m64, 8> {enum {stack_size = 0, flag = ARG_IN_MMX | ARG_TYPE_VALUE, reg_id = MM0};};
|
|
#endif
|
|
|
|
#if JITASM_XMMINTRIN
|
|
// specialization for __m128
|
|
template<int N> struct ArgTraits_cdecl<N, __m128, 16> {enum {stack_size = 0, flag = ARG_IN_XMM_SP | ARG_TYPE_VALUE, reg_id = XMM0};};
|
|
#endif
|
|
|
|
#if JITASM_EMMINTRIN
|
|
// specialization for __m128d
|
|
template<int N> struct ArgTraits_cdecl<N, __m128d, 16> {enum {stack_size = 0, flag = ARG_IN_XMM_DP | ARG_TYPE_VALUE, reg_id = XMM0};};
|
|
|
|
// specialization for __m128i
|
|
template<int N> struct ArgTraits_cdecl<N, __m128i, 16> {enum {stack_size = 0, flag = ARG_IN_XMM_INT | ARG_TYPE_VALUE, reg_id = XMM0};};
|
|
#endif
|
|
|
|
|
|
/// Microsoft x64 fastcall argument type traits
|
|
template<int N, class T, int Size = sizeof(T)>
|
|
struct ArgTraits_win64 {
|
|
enum {
|
|
stack_size = 8,
|
|
flag = ARG_IN_STACK | (Size == 1 || Size == 2 || Size == 4 || Size == 8 ? ARG_TYPE_VALUE : ARG_TYPE_PTR),
|
|
reg_id = INVALID
|
|
};
|
|
};
|
|
|
|
/**
|
|
* Base class for argument which is stored in general purpose register.
|
|
*/
|
|
template<int RegID, int Flag> struct ArgTraits_win64_reg {
|
|
enum {
|
|
stack_size = 8,
|
|
flag = Flag,
|
|
reg_id = RegID
|
|
};
|
|
};
|
|
|
|
// specialization for argument pointer stored in register
|
|
template<class T, int Size> struct ArgTraits_win64<0, T, Size> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_PTR> {};
|
|
template<class T, int Size> struct ArgTraits_win64<1, T, Size> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_PTR> {};
|
|
template<class T, int Size> struct ArgTraits_win64<2, T, Size> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_PTR> {};
|
|
template<class T, int Size> struct ArgTraits_win64<3, T, Size> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_PTR> {};
|
|
|
|
// specialization for 1 byte type
|
|
template<class T> struct ArgTraits_win64<0, T, 1> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<1, T, 1> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<2, T, 1> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<3, T, 1> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
|
|
// specialization for 2 bytes type
|
|
template<class T> struct ArgTraits_win64<0, T, 2> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<1, T, 2> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<2, T, 2> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<3, T, 2> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
|
|
// specialization for 4 bytes type
|
|
template<class T> struct ArgTraits_win64<0, T, 4> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<1, T, 4> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<2, T, 4> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<3, T, 4> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
|
|
// specialization for 8 bytes type
|
|
template<class T> struct ArgTraits_win64<0, T, 8> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<1, T, 8> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<2, T, 8> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<class T> struct ArgTraits_win64<3, T, 8> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
|
|
#if JITASM_MMINTRIN
|
|
// specialization for __m64
|
|
template<> struct ArgTraits_win64<0, __m64, 8> : ArgTraits_win64_reg<RCX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<1, __m64, 8> : ArgTraits_win64_reg<RDX, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<2, __m64, 8> : ArgTraits_win64_reg<R8, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<3, __m64, 8> : ArgTraits_win64_reg<R9, ARG_IN_REG | ARG_TYPE_VALUE> {};
|
|
#endif
|
|
|
|
// specialization for float
|
|
template<> struct ArgTraits_win64<0, float, 4> : ArgTraits_win64_reg<XMM0, ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<1, float, 4> : ArgTraits_win64_reg<XMM1, ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<2, float, 4> : ArgTraits_win64_reg<XMM2, ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<3, float, 4> : ArgTraits_win64_reg<XMM3, ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
|
|
// specialization for double
|
|
template<> struct ArgTraits_win64<0, double, 8> : ArgTraits_win64_reg<XMM0, ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<1, double, 8> : ArgTraits_win64_reg<XMM1, ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<2, double, 8> : ArgTraits_win64_reg<XMM2, ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
template<> struct ArgTraits_win64<3, double, 8> : ArgTraits_win64_reg<XMM3, ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
|
|
|
|
/// System V ABI AMD64 argument type traits
|
|
template<int N, class T, int Size = sizeof(T)>
|
|
struct ArgTraits_linux64 {
|
|
enum {
|
|
stack_size = (Size + 8 - 1) / 8 * 8,
|
|
flag = ARG_IN_STACK | ARG_TYPE_VALUE,
|
|
reg_id = INVALID
|
|
};
|
|
};
|
|
|
|
// INTEGER class
|
|
struct ArgTraits_linux64_integer {
|
|
enum {
|
|
stack_size = 0,
|
|
flag = ARG_IN_REG | ARG_TYPE_VALUE,
|
|
reg_id = RDI
|
|
};
|
|
};
|
|
|
|
template<int N> struct ArgTraits_linux64<N, bool, sizeof(bool) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, char, sizeof(char) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, unsigned char, sizeof(unsigned char) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, short, sizeof(short) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, unsigned short, sizeof(unsigned short) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, int, sizeof(int) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, unsigned int, sizeof(unsigned int) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, long, sizeof(long) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, unsigned long, sizeof(unsigned long) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, long long, sizeof(long long) > : ArgTraits_linux64_integer {};
|
|
template<int N> struct ArgTraits_linux64<N, unsigned long long, sizeof(unsigned long long)> : ArgTraits_linux64_integer {};
|
|
template<int N, class T> struct ArgTraits_linux64<N, T *, 8 > : ArgTraits_linux64_integer {};
|
|
|
|
// SSE class
|
|
template<int Flag> struct ArgTraits_linux64_sse {
|
|
enum {
|
|
stack_size = 0,
|
|
flag = Flag,
|
|
reg_id = XMM0
|
|
};
|
|
};
|
|
|
|
template<int N> struct ArgTraits_linux64<N, float, sizeof(float)> : ArgTraits_linux64_sse<ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
template<int N> struct ArgTraits_linux64<N, double, sizeof(double)> : ArgTraits_linux64_sse<ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
#if JITASM_MMINTRIN
|
|
template<int N> struct ArgTraits_linux64<N, __m64, sizeof(__m64)> : ArgTraits_linux64_sse<ARG_IN_XMM_INT | ARG_TYPE_VALUE> {};
|
|
#endif
|
|
#if JITASM_XMMINTRIN
|
|
template<int N> struct ArgTraits_linux64<N, __m128, sizeof(__m128)> : ArgTraits_linux64_sse<ARG_IN_XMM_SP | ARG_TYPE_VALUE> {};
|
|
#endif
|
|
#if JITASM_EMMINTRIN
|
|
template<int N> struct ArgTraits_linux64<N, __m128d, sizeof(__m128d)> : ArgTraits_linux64_sse<ARG_IN_XMM_DP | ARG_TYPE_VALUE> {};
|
|
template<int N> struct ArgTraits_linux64<N, __m128i, sizeof(__m128i)> : ArgTraits_linux64_sse<ARG_IN_XMM_INT | ARG_TYPE_VALUE> {};
|
|
#endif
|
|
|
|
|
|
/// Special argument type
|
|
struct ArgNone {};
|
|
|
|
/// Argument information
|
|
struct ArgInfo
|
|
{
|
|
Addr addr;
|
|
PhysicalRegID reg_id;
|
|
uint32 flag;
|
|
uint32 index_gp;
|
|
uint32 index_mmx;
|
|
uint32 index_xmm;
|
|
|
|
ArgInfo(const Addr& addr_, PhysicalRegID reg_id_, uint32 flg, uint32 idx_gp = 0, uint32 idx_mmx = 0, uint32 idx_xmm_ = 0) : addr(addr_), reg_id(reg_id_), flag(flg), index_gp(idx_gp), index_mmx(idx_mmx), index_xmm(idx_xmm_) {}
|
|
|
|
template<class CurArgTraits, class NextArgTraits> ArgInfo Next() const {
|
|
ArgInfo next_arg_info(addr + CurArgTraits::stack_size, static_cast<PhysicalRegID>(NextArgTraits::reg_id), NextArgTraits::flag, index_gp, index_mmx, index_xmm);
|
|
if (CurArgTraits::flag & ARG_IN_REG) next_arg_info.index_gp++;
|
|
if (CurArgTraits::flag & ARG_IN_MMX) next_arg_info.index_mmx++;
|
|
if (CurArgTraits::flag & (ARG_IN_XMM_SP | ARG_IN_XMM_DP | ARG_IN_XMM_INT)) next_arg_info.index_xmm++;
|
|
|
|
#ifdef JITASM64
|
|
#ifdef JITASM_WIN
|
|
// for Win64
|
|
#else
|
|
// for x64 Linux
|
|
if (NextArgTraits::flag & ARG_IN_REG) {
|
|
const PhysicalRegID gp_regs[] = {RDI, RSI, RDX, RCX, R8, R9};
|
|
next_arg_info.reg_id = next_arg_info.index_gp < 6 ? gp_regs[next_arg_info.index_gp] : INVALID;
|
|
}
|
|
if (CurArgTraits::flag & ARG_IN_REG) {
|
|
if (reg_id == INVALID) {
|
|
// This register argument is passed on stack
|
|
next_arg_info.addr = next_arg_info.addr + 8;
|
|
}
|
|
}
|
|
|
|
// __m128/__m128d/__m128i
|
|
if (NextArgTraits::flag & (ARG_IN_XMM_SP | ARG_IN_XMM_DP | ARG_IN_XMM_INT)) {
|
|
if (next_arg_info.index_xmm < 8) {
|
|
next_arg_info.reg_id = static_cast<PhysicalRegID>(next_arg_info.reg_id + next_arg_info.index_xmm);
|
|
} else {
|
|
next_arg_info.reg_id = INVALID;
|
|
}
|
|
}
|
|
if (CurArgTraits::flag & (ARG_IN_XMM_SP | ARG_IN_XMM_DP | ARG_IN_XMM_INT)) {
|
|
if (reg_id == INVALID) {
|
|
// This __m128/__m128d/__m128i argument is passed on stack
|
|
next_arg_info.addr = next_arg_info.addr + 16;
|
|
}
|
|
}
|
|
#endif
|
|
#else
|
|
// for x86 Win/Linux
|
|
|
|
// __m64
|
|
if (NextArgTraits::flag & ARG_IN_MMX) {
|
|
if (next_arg_info.index_mmx < 3) {
|
|
next_arg_info.reg_id = static_cast<PhysicalRegID>(next_arg_info.reg_id + next_arg_info.index_mmx);
|
|
} else {
|
|
next_arg_info.reg_id = INVALID;
|
|
}
|
|
}
|
|
if (CurArgTraits::flag & ARG_IN_MMX) {
|
|
if (reg_id == INVALID) {
|
|
// This __m64 argument is passed on stack
|
|
next_arg_info.addr = next_arg_info.addr + 8;
|
|
}
|
|
}
|
|
|
|
// __m128/__m128d/__m128i
|
|
if (NextArgTraits::flag & (ARG_IN_XMM_SP | ARG_IN_XMM_DP | ARG_IN_XMM_INT)) {
|
|
if (next_arg_info.index_xmm < 3) {
|
|
next_arg_info.reg_id = static_cast<PhysicalRegID>(next_arg_info.reg_id + next_arg_info.index_xmm);
|
|
} else {
|
|
next_arg_info.reg_id = INVALID;
|
|
}
|
|
}
|
|
if (CurArgTraits::flag & (ARG_IN_XMM_SP | ARG_IN_XMM_DP | ARG_IN_XMM_INT)) {
|
|
if (reg_id == INVALID) {
|
|
// This __m128/__m128d/__m128i argument is passed on stack
|
|
next_arg_info.addr = next_arg_info.addr + 16;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return next_arg_info;
|
|
}
|
|
};
|
|
|
|
/// Result type traits
|
|
template<class T>
|
|
struct ResultTraits {
|
|
enum { size = sizeof(T) };
|
|
typedef OpdT<sizeof(T) * 8> OpdR;
|
|
typedef AddressingPtr<OpdR> ResultPtr;
|
|
};
|
|
|
|
// specialization for void
|
|
template<>
|
|
struct ResultTraits<void> {
|
|
enum { size = 0 };
|
|
struct OpdR {};
|
|
struct ResultPtr {};
|
|
};
|
|
|
|
/// Result store destination
|
|
struct ResultDest {
|
|
Reg ptr;
|
|
ResultDest(Frontend& f, const ArgInfo& dest)
|
|
{
|
|
if (dest.reg_id != INVALID) {
|
|
// result pointer on register
|
|
f.DeclareRegArg(ptr, Reg(dest.reg_id), !dest.addr.reg_.IsInvalid() ? f.ptr[dest.addr] : Opd());
|
|
} else if (!dest.addr.reg_.IsInvalid()) {
|
|
// result pointer on stack
|
|
f.DeclareStackArg(ptr, f.ptr[dest.addr]);
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Function result
|
|
template<class T, int Size = ResultTraits<T>::size>
|
|
struct ResultT {
|
|
enum { ArgR = 1 /* First (hidden) argument is pointer for copying result. */};
|
|
typedef typename ResultTraits<T>::OpdR OpdR;
|
|
OpdR val_;
|
|
ResultT() {}
|
|
ResultT(const MemT<OpdR>& val) : val_(val) {}
|
|
void StoreResult(Frontend& f, const ResultDest& dst)
|
|
{
|
|
if (val_.IsMem()) {
|
|
f.lea(f.zsi, static_cast<MemT<OpdR>&>(val_));
|
|
f.mov(f.zcx, Size);
|
|
f.rep_movsb(dst.ptr, f.zsi, f.zcx);
|
|
f.DeclareResultReg(dst.ptr);
|
|
}
|
|
}
|
|
};
|
|
|
|
// specialization for void
|
|
template<>
|
|
struct ResultT<void, 0> {
|
|
enum { ArgR = 0 };
|
|
ResultT();
|
|
};
|
|
|
|
// specialization for 1byte type
|
|
template<class T>
|
|
struct ResultT<T, 1> {
|
|
enum { ArgR = 0 };
|
|
Opd8 val_;
|
|
ResultT() {}
|
|
ResultT(const Opd8& val) : val_(val) {}
|
|
ResultT(uint8 imm) : val_(Imm8(imm)) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsGpReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != AL) {
|
|
f.mov(f.al, static_cast<Reg8&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.mov(f.al, static_cast<Mem8&>(val_));
|
|
} else if (val_.IsImm()) {
|
|
f.mov(f.al, static_cast<Imm8&>(val_));
|
|
}
|
|
}
|
|
};
|
|
|
|
// specialization for 2bytes type
|
|
template<class T>
|
|
struct ResultT<T, 2> {
|
|
enum { ArgR = 0 };
|
|
Opd16 val_;
|
|
ResultT() {}
|
|
ResultT(const Opd16& val) : val_(val) {}
|
|
ResultT(uint16 imm) : val_(Imm16(imm)) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsGpReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != AX) {
|
|
f.mov(f.ax, static_cast<Reg16&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.mov(f.ax, static_cast<Mem16&>(val_));
|
|
} else if (val_.IsImm()) {
|
|
f.mov(f.ax, static_cast<Imm16&>(val_));
|
|
}
|
|
}
|
|
};
|
|
|
|
// specialization for 4bytes type
|
|
template<class T>
|
|
struct ResultT<T, 4> {
|
|
enum { ArgR = 0 };
|
|
Opd32 val_;
|
|
ResultT() {}
|
|
ResultT(const Opd32& val) : val_(val) {}
|
|
ResultT(uint32 imm) : val_(Imm32(imm)) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsGpReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != EAX) {
|
|
f.mov(f.eax, static_cast<Reg32&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.mov(f.eax, static_cast<Mem32&>(val_));
|
|
} else if (val_.IsImm()) {
|
|
f.mov(f.eax, static_cast<Imm32&>(val_));
|
|
}
|
|
}
|
|
};
|
|
|
|
// specialization for 8bytes type
|
|
template<class T>
|
|
struct ResultT<T, 8> {
|
|
enum { ArgR = 0 };
|
|
Opd64 val_;
|
|
ResultT() {}
|
|
ResultT(const Opd64& val) : val_(val) {}
|
|
ResultT(uint64 imm) : val_(Imm64(imm)) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
#ifdef JITASM64
|
|
if (val_.IsGpReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != RAX) {
|
|
f.mov(f.rax, static_cast<Reg64&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.mov(f.rax, static_cast<Mem64&>(val_));
|
|
} else if (val_.IsImm()) {
|
|
f.mov(f.rax, static_cast<Imm64&>(val_));
|
|
} else if (val_.IsMmxReg()) {
|
|
f.movq(f.rax, static_cast<MmxReg&>(val_));
|
|
}
|
|
#else
|
|
if (val_.IsMem()) {
|
|
// from memory
|
|
Mem32 lo(val_.GetAddressSize(), val_.GetBase(), val_.GetIndex(), val_.GetScale(), val_.GetDisp());
|
|
Mem32 hi(val_.GetAddressSize(), val_.GetBase(), val_.GetIndex(), val_.GetScale(), val_.GetDisp() + 4);
|
|
f.mov(f.eax, lo);
|
|
f.mov(f.edx, hi);
|
|
} else if (val_.IsImm()) {
|
|
// from immediate
|
|
f.mov(f.eax, static_cast<sint32>(val_.GetImm()));
|
|
f.mov(f.edx, static_cast<sint32>(val_.GetImm() >> 32));
|
|
}
|
|
#endif
|
|
}
|
|
};
|
|
|
|
// specialization for float
|
|
template<>
|
|
struct ResultT<float, 4> {
|
|
enum { ArgR = 0 };
|
|
detail::Opd val_;
|
|
ResultT() {}
|
|
ResultT(const FpuReg& fpu) : val_(fpu) {}
|
|
ResultT(const Mem32& mem) : val_(mem) {}
|
|
ResultT(const XmmReg& xmm) : val_(xmm) {}
|
|
ResultT(const float imm) : val_(Imm32(*(uint32*)&imm)) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
#ifdef JITASM64
|
|
if (val_.IsFpuReg()) {
|
|
// from FPU register
|
|
f.fstp(f.real4_ptr[f.rsp - 4]);
|
|
f.movss(f.xmm0, f.dword_ptr[f.rsp - 4]);
|
|
} else if (val_.IsMem() && val_.GetSize() == O_SIZE_32) {
|
|
// from memory
|
|
f.movss(f.xmm0, static_cast<Mem32&>(val_));
|
|
} else if (val_.IsXmmReg()) {
|
|
// from XMM register
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != XMM0) {
|
|
f.movss(f.xmm0, static_cast<XmmReg&>(val_));
|
|
}
|
|
} else if (val_.IsImm()) {
|
|
// from float immediate
|
|
f.mov(f.dword_ptr[f.rsp - 4], static_cast<Imm32&>(val_));
|
|
f.movss(f.xmm0, f.dword_ptr[f.rsp - 4]);
|
|
}
|
|
#else
|
|
if (val_.IsFpuReg()) {
|
|
// from FPU register
|
|
if (val_.GetReg().id != ST0) {
|
|
f.fld(static_cast<FpuReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem() && val_.GetSize() == O_SIZE_32) {
|
|
// from memory
|
|
f.fld(static_cast<Mem32&>(val_));
|
|
} else if (val_.IsXmmReg()) {
|
|
// from XMM register
|
|
f.movss(f.dword_ptr[f.esp - 4], static_cast<XmmReg&>(val_));
|
|
f.fld(f.real4_ptr[f.esp - 4]);
|
|
} else if (val_.IsImm()) {
|
|
// from float immediate
|
|
f.mov(f.dword_ptr[f.esp - 4], static_cast<Imm32&>(val_));
|
|
f.fld(f.real4_ptr[f.esp - 4]);
|
|
}
|
|
#endif
|
|
}
|
|
};
|
|
|
|
// specialization for double
|
|
template<>
|
|
struct ResultT<double, 8> {
|
|
enum { ArgR = 0 };
|
|
detail::Opd val_;
|
|
double imm_;
|
|
ResultT() {}
|
|
ResultT(const FpuReg& fpu) : val_(fpu) {}
|
|
ResultT(const Mem64& mem) : val_(mem) {}
|
|
ResultT(const XmmReg& xmm) : val_(xmm) {}
|
|
ResultT(const double imm) : val_(Imm32(0)), imm_(imm) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
#ifdef JITASM64
|
|
if (val_.IsFpuReg()) {
|
|
// from FPU register
|
|
f.fstp(f.real8_ptr[f.rsp - 8]);
|
|
f.movsd(f.xmm0, f.qword_ptr[f.rsp - 8]);
|
|
} else if (val_.IsMem() && val_.GetSize() == O_SIZE_64) {
|
|
// from memory
|
|
f.movsd(f.xmm0, static_cast<Mem64&>(val_));
|
|
} else if (val_.IsXmmReg()) {
|
|
// from XMM register
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != XMM0) {
|
|
f.movsd(f.xmm0, static_cast<XmmReg&>(val_));
|
|
}
|
|
} else if (val_.IsImm()) {
|
|
// from float immediate
|
|
f.mov(f.dword_ptr[f.rsp - 8], *reinterpret_cast<uint32*>(&imm_));
|
|
f.mov(f.dword_ptr[f.rsp - 4], *(reinterpret_cast<uint32*>(&imm_) + 1));
|
|
f.movsd(f.xmm0, f.qword_ptr[f.rsp - 8]);
|
|
}
|
|
#else
|
|
if (val_.IsFpuReg()) {
|
|
// from FPU register
|
|
if (val_.GetReg().id != ST0) {
|
|
f.fld(static_cast<FpuReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem() && val_.GetSize() == O_SIZE_64) {
|
|
// from memory
|
|
f.fld(static_cast<Mem64&>(val_));
|
|
} else if (val_.IsXmmReg()) {
|
|
// from XMM register
|
|
f.movsd(f.qword_ptr[f.esp - 8], static_cast<XmmReg&>(val_));
|
|
f.fld(f.real8_ptr[f.esp - 8]);
|
|
} else if (val_.IsImm()) { // val_ is immediate 0
|
|
// from double immediate
|
|
f.mov(f.dword_ptr[f.esp - 8], *reinterpret_cast<uint32*>(&imm_));
|
|
f.mov(f.dword_ptr[f.esp - 4], *(reinterpret_cast<uint32*>(&imm_) + 1));
|
|
f.fld(f.real8_ptr[f.esp - 8]);
|
|
}
|
|
#endif
|
|
}
|
|
};
|
|
|
|
|
|
#if JITASM_MMINTRIN
|
|
// specialization for __m64
|
|
template<>
|
|
struct ResultT<__m64, 8> {
|
|
enum { ArgR = 0 };
|
|
Opd64 val_;
|
|
ResultT() {}
|
|
ResultT(const MmxReg& mm) : val_(mm) {}
|
|
ResultT(const Mem64& mem) : val_(mem) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
#if defined(JITASM64) && !defined(JITASM_WIN)
|
|
if (val_.IsMmxReg()) {
|
|
f.movq2dq(f.xmm0, static_cast<const MmxReg&>(val_));
|
|
} else if (val_.IsMem()) {
|
|
f.movq(f.xmm0, static_cast<const Mem64&>(val_));
|
|
}
|
|
#else
|
|
if (val_.IsMmxReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != MM0) {
|
|
f.movq(f.mm0, static_cast<const MmxReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.movq(f.mm0, static_cast<const Mem64&>(val_));
|
|
}
|
|
#endif
|
|
}
|
|
};
|
|
#endif // JITASM_MMINTRIN
|
|
|
|
#if JITASM_XMMINTRIN
|
|
// specialization for __m128
|
|
template<>
|
|
struct ResultT<__m128, 16> {
|
|
enum { ArgR = 0 };
|
|
Opd128 val_;
|
|
ResultT() {}
|
|
ResultT(const XmmReg& xmm) : val_(xmm) {}
|
|
ResultT(const Mem128& mem) : val_(mem) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsXmmReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != XMM0) {
|
|
f.movaps(f.xmm0, static_cast<const XmmReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.movaps(f.xmm0, static_cast<const Mem128&>(val_));
|
|
}
|
|
}
|
|
};
|
|
#endif // JITASM_XMMINTRIN
|
|
|
|
#if JITASM_EMMINTRIN
|
|
// specialization for __m128d
|
|
template<>
|
|
struct ResultT<__m128d, 16> {
|
|
enum { ArgR = 0 };
|
|
Opd128 val_;
|
|
ResultT() {}
|
|
ResultT(const XmmReg& xmm) : val_(xmm) {}
|
|
ResultT(const Mem128& mem) : val_(mem) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsXmmReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != XMM0) {
|
|
f.movapd(f.xmm0, static_cast<const XmmReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.movapd(f.xmm0, static_cast<const Mem128&>(val_));
|
|
}
|
|
}
|
|
};
|
|
|
|
// specialization for __m128i
|
|
template<>
|
|
struct ResultT<__m128i, 16> {
|
|
enum { ArgR = 0 };
|
|
Opd128 val_;
|
|
ResultT() {}
|
|
ResultT(const XmmReg& xmm) : val_(xmm) {}
|
|
ResultT(const Mem128& mem) : val_(mem) {}
|
|
void StoreResult(Frontend& f, const ResultDest& /*dst*/)
|
|
{
|
|
if (val_.IsXmmReg()) {
|
|
if (val_.GetReg().IsSymbolic()) {
|
|
f.DeclareResultReg(val_);
|
|
} else if (val_.GetReg().id != XMM0) {
|
|
f.movdqa(f.xmm0, static_cast<const XmmReg&>(val_));
|
|
}
|
|
} else if (val_.IsMem()) {
|
|
f.movdqa(f.xmm0, static_cast<const Mem128&>(val_));
|
|
}
|
|
}
|
|
};
|
|
#endif // JITASM_EMMINTRIN
|
|
|
|
|
|
namespace calling_convention_cdecl
|
|
{
|
|
#ifdef JITASM64
|
|
#ifdef JITASM_WIN
|
|
template<int N, class T, int Size = sizeof(T)> struct ArgTraits : ArgTraits_win64<N, T, Size> {};
|
|
#else
|
|
template<int N, class T, int Size = sizeof(T)> struct ArgTraits : ArgTraits_linux64<N, T, Size> {};
|
|
#endif
|
|
#else
|
|
template<int N, class T, int Size = sizeof(T)> struct ArgTraits : ArgTraits_cdecl<N, T, Size> {};
|
|
#endif
|
|
|
|
template<class R>
|
|
ArgInfo ResultInfo()
|
|
{
|
|
if (ResultT<R>::ArgR) {
|
|
#ifdef JITASM64
|
|
return ArgInfo(Addr(RegID::CreatePhysicalRegID(R_TYPE_GP, RBP), SIZE_OF_GP_REG * 2), RCX, ARG_IN_REG | ARG_TYPE_PTR);
|
|
#else
|
|
return ArgInfo(Addr(RegID::CreatePhysicalRegID(R_TYPE_GP, EBP), SIZE_OF_GP_REG * 2), INVALID, ARG_IN_STACK | ARG_TYPE_PTR);
|
|
#endif
|
|
} else {
|
|
return ArgInfo(Addr(RegID::Invalid(), 0), INVALID, 0);
|
|
}
|
|
}
|
|
|
|
template<class R, class A1>
|
|
ArgInfo ArgInfo1() { return ArgInfo(Addr(RegID::CreatePhysicalRegID(R_TYPE_GP, EBP), SIZE_OF_GP_REG * (2 + ResultT<R>::ArgR)), static_cast<PhysicalRegID>(ArgTraits<ResultT<R>::ArgR + 0, A1>::reg_id), ArgTraits<ResultT<R>::ArgR + 0, A1>::flag); }
|
|
template<class R, class A1, class A2>
|
|
ArgInfo ArgInfo2() { return ArgInfo(ArgInfo1<R, A1>()).Next< ArgTraits<ResultT<R>::ArgR + 0, A1>, ArgTraits<ResultT<R>::ArgR + 1, A2> >(); }
|
|
template<class R, class A1, class A2, class A3>
|
|
ArgInfo ArgInfo3() { return ArgInfo(ArgInfo2<R, A1, A2>()).Next< ArgTraits<ResultT<R>::ArgR + 1, A2>, ArgTraits<ResultT<R>::ArgR + 2, A3> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4>
|
|
ArgInfo ArgInfo4() { return ArgInfo(ArgInfo3<R, A1, A2, A3>()).Next< ArgTraits<ResultT<R>::ArgR + 2, A3>, ArgTraits<ResultT<R>::ArgR + 3, A4> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5>
|
|
ArgInfo ArgInfo5() { return ArgInfo(ArgInfo4<R, A1, A2, A3, A4>()).Next< ArgTraits<ResultT<R>::ArgR + 3, A4>, ArgTraits<ResultT<R>::ArgR + 4, A5> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5, class A6>
|
|
ArgInfo ArgInfo6() { return ArgInfo(ArgInfo5<R, A1, A2, A3, A4, A5>()).Next< ArgTraits<ResultT<R>::ArgR + 4, A5>, ArgTraits<ResultT<R>::ArgR + 5, A6> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5, class A6, class A7>
|
|
ArgInfo ArgInfo7() { return ArgInfo(ArgInfo6<R, A1, A2, A3, A4, A5, A6>()).Next< ArgTraits<ResultT<R>::ArgR + 5, A6>, ArgTraits<ResultT<R>::ArgR + 6, A7> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8>
|
|
ArgInfo ArgInfo8() { return ArgInfo(ArgInfo7<R, A1, A2, A3, A4, A5, A6, A7>()).Next< ArgTraits<ResultT<R>::ArgR + 6, A7>, ArgTraits<ResultT<R>::ArgR + 7, A8> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9>
|
|
ArgInfo ArgInfo9() { return ArgInfo(ArgInfo8<R, A1, A2, A3, A4, A5, A6, A7, A8>()).Next< ArgTraits<ResultT<R>::ArgR + 7, A8>, ArgTraits<ResultT<R>::ArgR + 8, A9> >(); }
|
|
template<class R, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9, class A10>
|
|
ArgInfo ArgInfo10() { return ArgInfo(ArgInfo9<R, A1, A2, A3, A4, A5, A6, A7, A8, A9>()).Next< ArgTraits<ResultT<R>::ArgR + 8, A9>, ArgTraits<ResultT<R>::ArgR + 9, A10> >(); }
|
|
|
|
/// Function argument
|
|
template<class T, size_t Size = sizeof(T)>
|
|
struct Arg
|
|
{
|
|
Addr addr_;
|
|
#ifdef JITASM64
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : addr_(Reg()) {
|
|
if (arg_info.reg_id != INVALID) {
|
|
f.DeclareRegArg(Reg(addr_.reg_), Reg(arg_info.reg_id), f.ptr[arg_info.addr]);
|
|
} else {
|
|
f.DeclareStackArg(Reg(addr_.reg_), f.ptr[arg_info.addr]);
|
|
}
|
|
}
|
|
#else
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : addr_(arg_info.addr) {}
|
|
#endif
|
|
operator Addr () {return addr_;}
|
|
};
|
|
|
|
// specialization for 1byte type
|
|
template<class T>
|
|
struct Arg<T, 1>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->movzx(Reg64(arg_info_.reg_id), Reg8(arg_info_.reg_id));
|
|
f_->mov(f_->qword_ptr[arg_info_.addr], Reg64(arg_info_.reg_id));
|
|
}
|
|
#endif
|
|
return arg_info_.addr;
|
|
}
|
|
operator Reg8 () {
|
|
Reg8 reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->byte_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, Reg8(arg_info_.reg_id), f_->byte_ptr[arg_info_.addr]); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
// specialization for 2byte type
|
|
template<class T>
|
|
struct Arg<T, 2>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->movzx(Reg64(arg_info_.reg_id), Reg16(arg_info_.reg_id));
|
|
f_->mov(f_->qword_ptr[arg_info_.addr], Reg64(arg_info_.reg_id));
|
|
}
|
|
#endif
|
|
return arg_info_.addr;
|
|
}
|
|
operator Reg16 () {
|
|
Reg16 reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->word_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, Reg16(arg_info_.reg_id), f_->word_ptr[arg_info_.addr]); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
// specialization for 4byte type
|
|
template<class T>
|
|
struct Arg<T, 4>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->mov(f_->qword_ptr[arg_info_.addr], Reg64(arg_info_.reg_id));
|
|
}
|
|
#endif
|
|
return arg_info_.addr;
|
|
}
|
|
operator Reg32 () {
|
|
Reg32 reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->dword_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, Reg32(arg_info_.reg_id), f_->dword_ptr[arg_info_.addr]); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
#ifdef JITASM64
|
|
// specialization for 8byte type
|
|
template<class T>
|
|
struct Arg<T, 8>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->mov(f_->qword_ptr[arg_info_.addr], Reg64(arg_info_.reg_id));
|
|
}
|
|
return arg_info_.addr;
|
|
}
|
|
operator Reg64 () {
|
|
Reg64 reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->qword_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, Reg64(arg_info_.reg_id), f_->qword_ptr[arg_info_.addr]); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
#endif
|
|
|
|
// specialization for float
|
|
template<>
|
|
struct Arg<float, 4>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->movss(f_->dword_ptr[arg_info_.addr], XmmReg(arg_info_.reg_id));
|
|
}
|
|
#endif
|
|
return arg_info_.addr;
|
|
}
|
|
operator XmmReg () {
|
|
XmmReg reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->dword_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, XmmReg(arg_info_.reg_id)); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
// specialization for double
|
|
template<>
|
|
struct Arg<double, 8>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when x64 argument on register
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->movsd(f_->qword_ptr[arg_info_.addr], XmmReg(arg_info_.reg_id));
|
|
}
|
|
#endif
|
|
return arg_info_.addr;
|
|
}
|
|
operator XmmReg () {
|
|
XmmReg reg;
|
|
if (arg_info_.reg_id == INVALID) {
|
|
f_->DeclareStackArg(reg, f_->qword_ptr[arg_info_.addr]); // argument on stack
|
|
} else {
|
|
f_->DeclareRegArg(reg, XmmReg(arg_info_.reg_id)); // argument on register
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
|
|
#if JITASM_MMINTRIN
|
|
// specialization for __m64
|
|
template<>
|
|
struct Arg<__m64, 8>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
if (arg_info_.reg_id != INVALID) {
|
|
// Passed by mmx register
|
|
if (arg_info_.flag & ARG_IN_REG) {
|
|
// Win64
|
|
#ifdef JITASM64
|
|
// Dump to shadow space when Win64 argument on register
|
|
Reg64 arg;
|
|
f_->DeclareRegArg(arg, Reg64(arg_info_.reg_id));
|
|
f_->mov(f_->qword_ptr[arg_info_.addr], arg);
|
|
#endif
|
|
return arg_info_.addr;
|
|
} else if (arg_info_.flag & ARG_IN_XMM_INT) {
|
|
// x64 Linux
|
|
XmmReg arg;
|
|
f_->DeclareRegArg(arg, XmmReg(arg_info_.reg_id));
|
|
Addr addr = f_->stack_manager_.Alloc(8, 8);
|
|
f_->movq(f_->qword_ptr[addr], arg);
|
|
return addr;
|
|
} else {
|
|
MmxReg arg;
|
|
f_->DeclareRegArg(arg, MmxReg(arg_info_.reg_id));
|
|
Addr addr = f_->stack_manager_.Alloc(8, 8);
|
|
f_->movq(f_->qword_ptr[addr], arg);
|
|
return addr;
|
|
}
|
|
} else {
|
|
// Passed by stack
|
|
return arg_info_.addr;
|
|
}
|
|
}
|
|
operator MmxReg () {
|
|
MmxReg reg;
|
|
if (arg_info_.reg_id != INVALID) {
|
|
// Passed by register
|
|
if (arg_info_.flag & ARG_IN_REG) {
|
|
// Win64
|
|
#ifdef JITASM64
|
|
Reg64 arg;
|
|
f_->DeclareRegArg(arg, Reg64(arg_info_.reg_id));
|
|
f_->movq(reg, arg);
|
|
#endif
|
|
} else if(arg_info_.flag & ARG_IN_XMM_INT) {
|
|
// x64 Linux
|
|
XmmReg arg;
|
|
f_->DeclareRegArg(arg, XmmReg(arg_info_.reg_id));
|
|
f_->movdq2q(reg, arg);
|
|
} else {
|
|
f_->DeclareRegArg(reg, MmxReg(arg_info_.reg_id));
|
|
}
|
|
} else {
|
|
// Passed by stack
|
|
f_->DeclareStackArg(reg, f_->qword_ptr[arg_info_.addr]);
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
#endif // JITASM_MMINTRIN
|
|
|
|
#if JITASM_XMMINTRIN
|
|
// specialization for __m128
|
|
template<>
|
|
struct Arg<__m128, 16>
|
|
{
|
|
Frontend *f_;
|
|
ArgInfo arg_info_;
|
|
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : f_(&f), arg_info_(arg_info) {}
|
|
operator Addr () {
|
|
if (arg_info_.flag & ARG_TYPE_PTR) {
|
|
Reg ptr;
|
|
if (arg_info_.reg_id != INVALID) {
|
|
f_->DeclareRegArg(ptr, Reg(arg_info_.reg_id)); // argument on register
|
|
} else {
|
|
f_->mov(ptr, f_->ptr[arg_info_.addr]);
|
|
}
|
|
return ptr;
|
|
} else if (arg_info_.reg_id != INVALID) {
|
|
Addr addr = f_->stack_manager_.Alloc(16, 16);
|
|
f_->movdqa(f_->xmmword_ptr[addr], XmmReg(arg_info_.reg_id));
|
|
return addr;
|
|
} else {
|
|
return arg_info_.addr;
|
|
}
|
|
}
|
|
operator XmmReg () {
|
|
XmmReg reg;
|
|
if (arg_info_.flag & ARG_TYPE_PTR) {
|
|
// Passed by pointer
|
|
if (arg_info_.reg_id != INVALID) {
|
|
// argument pointer on register
|
|
f_->movdqa(reg, f_->xmmword_ptr[Reg(arg_info_.reg_id)]);
|
|
} else {
|
|
// argument pointer on stack
|
|
Reg ptr;
|
|
f_->mov(ptr, f_->ptr[arg_info_.addr]);
|
|
f_->movdqa(reg, f_->xmmword_ptr[ptr]);
|
|
}
|
|
} else if (arg_info_.reg_id != INVALID) {
|
|
// Passed by xmm register
|
|
f_->DeclareRegArg(reg, XmmReg(arg_info_.reg_id));
|
|
} else {
|
|
// Passed by stack
|
|
f_->DeclareStackArg(reg, f_->xmmword_ptr[arg_info_.addr]);
|
|
}
|
|
return reg;
|
|
}
|
|
};
|
|
#endif // JITASM_XMMINTRIN
|
|
|
|
#if JITASM_EMMINTRIN
|
|
// specialization for __m128d, __m128i
|
|
template<> struct Arg<__m128d, 16> : Arg<__m128, 16> {
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : Arg<__m128, 16>(f, arg_info) {}
|
|
};
|
|
|
|
template<> struct Arg<__m128i, 16> : Arg<__m128, 16> {
|
|
Arg(Frontend& f, const ArgInfo& arg_info) : Arg<__m128, 16>(f, arg_info) {}
|
|
};
|
|
#endif // JITASM_EMMINTRIN
|
|
} // namespace calling_convention_cdecl
|
|
|
|
} // namespace detail
|
|
|
|
/// cdecl function
|
|
template<
|
|
class R,
|
|
class Derived,
|
|
class A1 = detail::ArgNone,
|
|
class A2 = detail::ArgNone,
|
|
class A3 = detail::ArgNone,
|
|
class A4 = detail::ArgNone,
|
|
class A5 = detail::ArgNone,
|
|
class A6 = detail::ArgNone,
|
|
class A7 = detail::ArgNone,
|
|
class A8 = detail::ArgNone,
|
|
class A9 = detail::ArgNone,
|
|
class A10 = detail::ArgNone>
|
|
struct function_cdecl : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8, A9, A10);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<R,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<R,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<R,A1,A2,A3,A4,A5,A6,A7,A8>()),
|
|
Arg<A9>(*this, ArgInfo9<R,A1,A2,A3,A4,A5,A6,A7,A8,A9>()),
|
|
Arg<A10>(*this, ArgInfo10<R,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 10 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9, class A10>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8, A9, A10);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<void,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<void,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<void,A1,A2,A3,A4,A5,A6,A7,A8>()),
|
|
Arg<A9>(*this, ArgInfo9<void,A1,A2,A3,A4,A5,A6,A7,A8,A9>()),
|
|
Arg<A10>(*this, ArgInfo10<void,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 9 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, A5, A6, A7, A8, A9, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8, A9);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<R,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<R,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<R,A1,A2,A3,A4,A5,A6,A7,A8>()),
|
|
Arg<A9>(*this, ArgInfo9<R,A1,A2,A3,A4,A5,A6,A7,A8,A9>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 9 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8, class A9>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, A6, A7, A8, A9, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8, A9);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<void,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<void,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<void,A1,A2,A3,A4,A5,A6,A7,A8>()),
|
|
Arg<A9>(*this, ArgInfo9<void,A1,A2,A3,A4,A5,A6,A7,A8,A9>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 8 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, A5, A6, A7, A8, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<R,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<R,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<R,A1,A2,A3,A4,A5,A6,A7,A8>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 8 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7, class A8>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, A6, A7, A8, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7, A8);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<void,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<void,A1,A2,A3,A4,A5,A6,A7>()),
|
|
Arg<A8>(*this, ArgInfo8<void,A1,A2,A3,A4,A5,A6,A7,A8>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 7 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, A5, A6, A7, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<R,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<R,A1,A2,A3,A4,A5,A6,A7>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 7 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5, class A6, class A7>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, A6, A7, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5, A6, A7);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<void,A1,A2,A3,A4,A5,A6>()),
|
|
Arg<A7>(*this, ArgInfo7<void,A1,A2,A3,A4,A5,A6,A7>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 6 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4, class A5, class A6>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, A5, A6, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5, A6);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<R,A1,A2,A3,A4,A5,A6>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 6 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5, class A6>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, A6, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5, A6);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()),
|
|
Arg<A6>(*this, ArgInfo6<void,A1,A2,A3,A4,A5,A6>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 5 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4, class A5>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, A5, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4, A5);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<R,A1,A2,A3,A4,A5>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 5 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4, class A5>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, A5, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4, A5);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()),
|
|
Arg<A5>(*this, ArgInfo5<void,A1,A2,A3,A4,A5>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 4 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3, class A4>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, A4, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3, A4);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<R,A1,A2,A3,A4>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 4 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3, class A4>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, A4, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3, A4);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()),
|
|
Arg<A4>(*this, ArgInfo4<void,A1,A2,A3,A4>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 3 arguments
|
|
template<class R, class Derived, class A1, class A2, class A3>
|
|
struct function_cdecl<R, Derived, A1, A2, A3, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2, A3);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<R,A1,A2,A3>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 3 arguments and no result
|
|
template<class Derived, class A1, class A2, class A3>
|
|
struct function_cdecl<void, Derived, A1, A2, A3, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2, A3);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()),
|
|
Arg<A3>(*this, ArgInfo3<void,A1,A2,A3>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 2 arguments
|
|
template<class R, class Derived, class A1, class A2>
|
|
struct function_cdecl<R, Derived, A1, A2, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1, A2);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<R,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<R,A1,A2>())
|
|
).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 2 arguments and no result
|
|
template<class Derived, class A1, class A2>
|
|
struct function_cdecl<void, Derived, A1, A2, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1, A2);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(
|
|
Arg<A1>(*this, ArgInfo1<void,A1>()),
|
|
Arg<A2>(*this, ArgInfo2<void,A1,A2>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 1 argument
|
|
template<class R, class Derived, class A1>
|
|
struct function_cdecl<R, Derived, A1, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)(A1);
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main(Arg<A1>(*this, ArgInfo1<R,A1>())).StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for 1 argument and no result
|
|
template<class Derived, class A1>
|
|
struct function_cdecl<void, Derived, A1, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)(A1);
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
using namespace detail::calling_convention_cdecl;
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main(Arg<A1>(*this, ArgInfo1<void,A1>()));
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for no arguments
|
|
template<class R, class Derived>
|
|
struct function_cdecl<R, Derived, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef R (*FuncPtr)();
|
|
typedef detail::ResultT<R> Result; ///< main function result type
|
|
typename detail::ResultTraits<R>::ResultPtr result_ptr;
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
Prolog();
|
|
detail::ResultDest result_dst(*this, detail::calling_convention_cdecl::ResultInfo<R>());
|
|
static_cast<Derived *>(this)->main().StoreResult(*this, result_dst);
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
// specialization for no arguments and no result
|
|
template<class Derived>
|
|
struct function_cdecl<void, Derived, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone, detail::ArgNone> : Frontend
|
|
{
|
|
typedef void (*FuncPtr)();
|
|
operator FuncPtr() { return (FuncPtr)GetCode(); }
|
|
void InternalMain() {static_cast<Derived *>(this)->naked_main();}
|
|
void naked_main() {
|
|
Prolog();
|
|
static_cast<Derived *>(this)->main();
|
|
Epilog();
|
|
}
|
|
};
|
|
|
|
|
|
/// function
|
|
template<
|
|
class R,
|
|
class Derived,
|
|
class A1 = detail::ArgNone,
|
|
class A2 = detail::ArgNone,
|
|
class A3 = detail::ArgNone,
|
|
class A4 = detail::ArgNone,
|
|
class A5 = detail::ArgNone,
|
|
class A6 = detail::ArgNone,
|
|
class A7 = detail::ArgNone,
|
|
class A8 = detail::ArgNone,
|
|
class A9 = detail::ArgNone,
|
|
class A10 = detail::ArgNone>
|
|
struct function : function_cdecl<R, Derived, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10> {};
|
|
|
|
} // namespace jitasm
|
|
|
|
#if defined(_MSC_VER)
|
|
#pragma warning( pop )
|
|
#endif
|
|
|
|
#endif // #ifndef JITASM_H
|