--- embedaddon/pcre/sljit/sljitNativeX86_common.c 2012/10/09 09:19:18 1.1.1.3 +++ embedaddon/pcre/sljit/sljitNativeX86_common.c 2013/07/22 08:25:57 1.1.1.4 @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "x86" SLJIT_CPUINFO; } @@ -67,17 +67,17 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_p #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 + 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ - w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \ + w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \ p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ do; \ } \ else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ - w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \ + w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ do; \ } @@ -95,20 +95,20 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 + 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 + 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 + 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 + 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 }; #endif @@ -118,9 +118,6 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTER #define REX_B 0x41 #define REX 0x40 -typedef unsigned int sljit_uhw; -typedef int sljit_hw; - #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll) #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll) @@ -129,7 +126,7 @@ typedef int sljit_hw; #endif /* SLJIT_CONFIG_X86_32 */ #if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define TMP_FREG (SLJIT_FLOAT_REG4 + 1) +#define TMP_FREG (0) #endif /* Size flags for emit_x86_instruction: */ @@ -142,108 +139,278 @@ typedef int sljit_hw; #define EX86_PREF_66 0x0400 #if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define EX86_PREF_F2 0x0800 -#define EX86_SSE2 0x1000 +#define EX86_SSE2 0x0800 +#define EX86_PREF_F2 0x1000 +#define EX86_PREF_F3 0x2000 #endif -#define INC_SIZE(s) (*buf++ = (s), compiler->size += (s)) -#define INC_CSIZE(s) (*code++ = (s), compiler->size += (s)) +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ -#define PUSH_REG(r) (*buf++ = (0x50 + (r))) -#define POP_REG(r) (*buf++ = (0x58 + (r))) -#define RET() (*buf++ = (0xc3)) -#define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0) +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JE_i8 0x74 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHR (/* SHIFT */ 5 << 3) +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define UCOMISD_x_xm 0x2e +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) +#define POP_REG(r) (*inst++ = (POP_r + (r))) +#define RET() (*inst++ = (RET_near)) +#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) /* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm)) +#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) -static sljit_ub get_jump_code(int type) +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +static sljit_si cpu_has_sse2 = -1; +#endif +static sljit_si cpu_has_cmov = -1; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 +#include +#endif + +static void get_cpu_features(void) { + sljit_ui features; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + int CPUInfo[4]; + __cpuid(CPUInfo, 1); + features = (sljit_ui)CPUInfo[3]; + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) + + /* AT&T syntax. */ + __asm__ ( + "movl $0x1, %%eax\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* On x86-32, there is no red zone, so this + should work (no need for a local variable). */ + "push %%ebx\n" +#endif + "cpuid\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + "pop %%ebx\n" +#endif + "movl %%edx, %0\n" + : "=g" (features) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "%eax", "%ecx", "%edx" +#else + : "%rax", "%rbx", "%rcx", "%rdx" +#endif + ); + +#else /* _MSC_VER && _MSC_VER >= 1400 */ + + /* Intel syntax. */ + __asm { + mov eax, 1 + cpuid + mov features, edx + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + cpu_has_sse2 = (features >> 26) & 0x1; +#endif + cpu_has_cmov = (features >> 15) & 0x1; +} + +static sljit_ub get_jump_code(sljit_si type) +{ switch (type) { case SLJIT_C_EQUAL: case SLJIT_C_FLOAT_EQUAL: - return 0x84; + return 0x84 /* je */; case SLJIT_C_NOT_EQUAL: case SLJIT_C_FLOAT_NOT_EQUAL: - return 0x85; + return 0x85 /* jne */; case SLJIT_C_LESS: case SLJIT_C_FLOAT_LESS: - return 0x82; + return 0x82 /* jc */; case SLJIT_C_GREATER_EQUAL: case SLJIT_C_FLOAT_GREATER_EQUAL: - return 0x83; + return 0x83 /* jae */; case SLJIT_C_GREATER: case SLJIT_C_FLOAT_GREATER: - return 0x87; + return 0x87 /* jnbe */; case SLJIT_C_LESS_EQUAL: case SLJIT_C_FLOAT_LESS_EQUAL: - return 0x86; + return 0x86 /* jbe */; case SLJIT_C_SIG_LESS: - return 0x8c; + return 0x8c /* jl */; case SLJIT_C_SIG_GREATER_EQUAL: - return 0x8d; + return 0x8d /* jnl */; case SLJIT_C_SIG_GREATER: - return 0x8f; + return 0x8f /* jnle */; case SLJIT_C_SIG_LESS_EQUAL: - return 0x8e; + return 0x8e /* jle */; case SLJIT_C_OVERFLOW: case SLJIT_C_MUL_OVERFLOW: - return 0x80; + return 0x80 /* jo */; case SLJIT_C_NOT_OVERFLOW: case SLJIT_C_MUL_NOT_OVERFLOW: - return 0x81; + return 0x81 /* jno */; - case SLJIT_C_FLOAT_NAN: - return 0x8a; + case SLJIT_C_FLOAT_UNORDERED: + return 0x8a /* jp */; - case SLJIT_C_FLOAT_NOT_NAN: - return 0x8b; + case SLJIT_C_FLOAT_ORDERED: + return 0x8b /* jpo */; } return 0; } -static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type); +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type); +static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); #endif -static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type) +static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) { - int short_jump; + sljit_si short_jump; sljit_uw label_addr; if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else label_addr = jump->u.target; - short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127; + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll) + if ((sljit_sw)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_sw)(label_addr - (jump->addr + 1)) < -0x80000000ll) return generate_far_jump_code(jump, code_ptr, type); #endif if (type == SLJIT_JUMP) { if (short_jump) - *code_ptr++ = 0xeb; + *code_ptr++ = JMP_i8; else - *code_ptr++ = 0xe9; + *code_ptr++ = JMP_i32; jump->addr++; } else if (type >= SLJIT_FAST_CALL) { short_jump = 0; - *code_ptr++ = 0xe8; + *code_ptr++ = CALL_i32; jump->addr++; } else if (short_jump) { @@ -251,20 +418,20 @@ static sljit_ub* generate_near_jump_code(struct sljit_ jump->addr++; } else { - *code_ptr++ = 0x0f; + *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); jump->addr += 2; } if (short_jump) { jump->flags |= PATCH_MB; - code_ptr += sizeof(sljit_b); + code_ptr += sizeof(sljit_sb); } else { jump->flags |= PATCH_MW; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code_ptr += sizeof(sljit_w); + code_ptr += sizeof(sljit_sw); #else - code_ptr += sizeof(sljit_hw); + code_ptr += sizeof(sljit_si); #endif } @@ -323,19 +490,19 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(str label = label->next; } else if (*buf_ptr == 1) { - const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w); + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); const_ = const_->next; } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */; + *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; buf_ptr++; - *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w)); - code_ptr += sizeof(sljit_w); - buf_ptr += sizeof(sljit_w) - 1; + *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); + code_ptr += sizeof(sljit_sw); + buf_ptr += sizeof(sljit_sw) - 1; #else - code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr); - buf_ptr += sizeof(sljit_w); + code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); + buf_ptr += sizeof(sljit_sw); #endif } buf_ptr++; @@ -352,29 +519,29 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(str jump = compiler->jumps; while (jump) { if (jump->flags & PATCH_MB) { - SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127); - *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); + *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w))); + *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); #else - SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll); - *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll); + *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); #endif } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w))); + *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); #else - SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll); - *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw))); + SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll); + *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); #endif } } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - *(sljit_w*)jump->addr = jump->u.label->addr; + *(sljit_sw*)jump->addr = jump->u.label->addr; #endif jump = jump->next; @@ -383,7 +550,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(str /* Maybe we waste some space because of short jumps. */ SLJIT_ASSERT(code_ptr <= code + compiler->size); compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_size = compiler->size; + compiler->executable_size = code_ptr - code; return (void*)code; } @@ -391,65 +558,65 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(str /* Operators */ /* --------------------------------------------------------------------- */ -static int emit_cum_binary(struct sljit_compiler *compiler, +static sljit_si emit_cum_binary(struct sljit_compiler *compiler, sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w); + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); -static int emit_non_cum_binary(struct sljit_compiler *compiler, +static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w); + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); -static int emit_mov(struct sljit_compiler *compiler, - int dst, sljit_w dstw, - int src, sljit_w srcw); +static sljit_si emit_mov(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw); -static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler) +static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); INC_SIZE(5); #else - buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); INC_SIZE(6); - *buf++ = REX_W; + *inst++ = REX_W; #endif - *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */ - *buf++ = 0x64; - *buf++ = 0x24; - *buf++ = (sljit_ub)sizeof(sljit_w); - *buf++ = 0x9c; /* pushfd / pushfq */ + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)sizeof(sljit_sw); + *inst++ = PUSHF; compiler->flags_saved = 1; return SLJIT_SUCCESS; } -static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags) +static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); INC_SIZE(5); - *buf++ = 0x9d; /* popfd */ + *inst++ = POPF; #else - buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); INC_SIZE(6); - *buf++ = 0x9d; /* popfq */ - *buf++ = REX_W; + *inst++ = POPF; + *inst++ = REX_W; #endif - *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */ - *buf++ = 0x64; - *buf++ = 0x24; - *buf++ = (sljit_ub)-(int)sizeof(sljit_w); + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); compiler->flags_saved = keep_flags; return SLJIT_SUCCESS; } @@ -457,15 +624,16 @@ static SLJIT_INLINE int emit_restore_flags(struct slji #ifdef _WIN32 #include -static void SLJIT_CALL sljit_grow_stack(sljit_w local_size) +static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) { /* Workaround for calling the internal _chkstk() function on Windows. This function touches all 4k pages belongs to the requested stack space, which size is passed in local_size. This is necessary on Windows where the stack can only grow in 4k steps. However, this function just burn - CPU cycles if the stack is large enough, but you don't know it in advance. - I think this is a bad design even if it has some reasons. */ - alloca(local_size); + CPU cycles if the stack is large enough. However, you don't know it in + advance, so it must always be called. I think this is a bad design in + general even if it has some reasons. */ + *(sljit_si*)alloca(local_size) = 0; } #endif @@ -476,79 +644,79 @@ static void SLJIT_CALL sljit_grow_stack(sljit_w local_ #include "sljitNativeX86_64.c" #endif -static int emit_mov(struct sljit_compiler *compiler, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_mov(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { /* No destination, doesn't need to setup flags. */ if (src & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; } return SLJIT_SUCCESS; } - if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + if (src <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } if (src & SLJIT_IMM) { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { + if (dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else if (!compiler->mode32) { if (NOT_HALFWORD(srcw)) return emit_load_imm64(compiler, dst, srcw); } else - return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw); + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); #endif } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (!compiler->mode32 && NOT_HALFWORD(srcw)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } #endif - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; return SLJIT_SUCCESS; } - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; + if (dst <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; return SLJIT_SUCCESS; } /* Memory to memory move. Requires two instruction. */ - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - int size; + sljit_si size; #endif CHECK_ERROR(); @@ -556,16 +724,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct slj switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0xcc; + *inst = INT3; break; case SLJIT_NOP: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x90; + *inst = NOP; break; case SLJIT_UMUL: case SLJIT_SMUL: @@ -575,14 +743,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct slj #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_TEMPORARY_REG1] == 0 - && reg_map[SLJIT_TEMPORARY_REG2] == 2 + reg_map[SLJIT_SCRATCH_REG1] == 0 + && reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[TMP_REGISTER] > 7, invalid_register_assignment_for_div_mul); #else SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_TEMPORARY_REG1] == 0 - && reg_map[SLJIT_TEMPORARY_REG2] < 7 + reg_map[SLJIT_SCRATCH_REG1] == 0 + && reg_map[SLJIT_SCRATCH_REG2] < 7 && reg_map[TMP_REGISTER] == 2, invalid_register_assignment_for_div_mul); #endif @@ -592,87 +760,86 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct slj op = GET_OPCODE(op); if (op == SLJIT_UDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); - buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0); + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); #else - buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); #endif - FAIL_IF(!buf); - *buf = 0x33; + FAIL_IF(!inst); + *inst = XOR_r_rm; } if (op == SLJIT_SDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0); #endif - /* CDQ instruction */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x99; + *inst = CDQ; #else if (compiler->mode32) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x99; + *inst = CDQ; } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); INC_SIZE(2); - *buf++ = REX_W; - *buf = 0x99; + *inst++ = REX_W; + *inst = CDQ; } #endif } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); INC_SIZE(2); - *buf++ = 0xf7; - *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]); + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_SCRATCH_REG2]); #else #ifdef _WIN64 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; #else size = (!compiler->mode32) ? 3 : 2; #endif - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); #ifdef _WIN64 if (!compiler->mode32) - *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); + *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); else if (op >= SLJIT_UDIV) - *buf++ = REX_B; - *buf++ = 0xf7; - *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]); + *inst++ = REX_B; + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_SCRATCH_REG2]); #else if (!compiler->mode32) - *buf++ = REX_W; - *buf++ = 0xf7; - *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2]; + *inst++ = REX_W; + *inst++ = GROUP_F7; + *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; #endif #endif switch (op) { case SLJIT_UMUL: - *buf |= 4 << 3; + *inst |= MUL; break; case SLJIT_SMUL: - *buf |= 5 << 3; + *inst |= IMUL; break; case SLJIT_UDIV: - *buf |= 6 << 3; + *inst |= DIV; break; case SLJIT_SDIV: - *buf |= 7 << 3; + *inst |= IDIV; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0); + EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REGISTER, 0); #endif break; } @@ -682,20 +849,20 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct slj #define ENCODE_PREFIX(prefix) \ do { \ - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ - FAIL_IF(!code); \ - INC_CSIZE(1); \ - *code = (prefix); \ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!inst); \ + INC_SIZE(1); \ + *inst = (prefix); \ } while (0) -static int emit_mov_byte(struct sljit_compiler *compiler, int sign, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; - int dst_r; + sljit_ub* inst; + sljit_si dst_r; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - int work_r; + sljit_si work_r; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -706,22 +873,25 @@ static int emit_mov_byte(struct sljit_compiler *compil return SLJIT_SUCCESS; /* Empty instruction. */ if (src & SLJIT_IMM) { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { + if (dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else - return emit_load_imm64(compiler, dst, srcw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; #endif } - code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc6; + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; return SLJIT_SUCCESS; } - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; + dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { + if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (reg_map[src] >= 4) { SLJIT_ASSERT(dst_r == TMP_REGISTER); @@ -733,35 +903,34 @@ static int emit_mov_byte(struct sljit_compiler *compil #endif } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) { + else if (src <= TMP_REGISTER && reg_map[src] >= 4) { /* src, dst are registers. */ - SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER); + SLJIT_ASSERT(dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REGISTER); if (reg_map[dst] < 4) { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); - code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbe : 0xb6; + inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; } else { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); if (sign) { /* shl reg, 24 */ - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!code); - *code |= 0x4 << 3; - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!code); - /* shr/sar reg, 24 */ - *code |= 0x7 << 3; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SHL; + /* sar reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SAR; } else { - /* and dst, 0xff */ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0); - FAIL_IF(!code); - *(code + 1) |= 0x4 << 3; + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; } } return SLJIT_SUCCESS; @@ -769,74 +938,74 @@ static int emit_mov_byte(struct sljit_compiler *compil #endif else { /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ - code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbe : 0xb6; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; } if (dst & SLJIT_MEM) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst_r == TMP_REGISTER) { /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) { - if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4)) - work_r = SLJIT_TEMPORARY_REG3; + if ((dst & 0xf) == SLJIT_SCRATCH_REG1) { + if ((dst & 0xf0) == (SLJIT_SCRATCH_REG2 << 4)) + work_r = SLJIT_SCRATCH_REG3; else - work_r = SLJIT_TEMPORARY_REG2; + work_r = SLJIT_SCRATCH_REG2; } else { - if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4)) - work_r = SLJIT_TEMPORARY_REG1; - else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2) - work_r = SLJIT_TEMPORARY_REG3; + if ((dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4)) + work_r = SLJIT_SCRATCH_REG1; + else if ((dst & 0xf) == SLJIT_SCRATCH_REG2) + work_r = SLJIT_SCRATCH_REG3; else - work_r = SLJIT_TEMPORARY_REG2; + work_r = SLJIT_SCRATCH_REG2; } - if (work_r == SLJIT_TEMPORARY_REG1) { - ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + if (work_r == SLJIT_SCRATCH_REG1) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); } else { - code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } - code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; - if (work_r == SLJIT_TEMPORARY_REG1) { - ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + if (work_r == SLJIT_SCRATCH_REG1) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); } else { - code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } } else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; } #else - code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; #endif } return SLJIT_SUCCESS; } -static int emit_mov_half(struct sljit_compiler *compiler, int sign, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; - int dst_r; + sljit_ub* inst; + sljit_si dst_r; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; @@ -846,193 +1015,222 @@ static int emit_mov_half(struct sljit_compiler *compil return SLJIT_SUCCESS; /* Empty instruction. */ if (src & SLJIT_IMM) { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { + if (dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else - return emit_load_imm64(compiler, dst, srcw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; #endif } - code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; return SLJIT_SUCCESS; } - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; + dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)) + if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) dst_r = src; else { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbf : 0xb7; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; } if (dst & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; } return SLJIT_SUCCESS; } -static int emit_unary(struct sljit_compiler *compiler, int un_index, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } if (dst == src && dstw == srcw) { /* Same input and output */ - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + if (dst <= TMP_REGISTER) { EMIT_MOV(compiler, dst, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } -static int emit_not_with_flags(struct sljit_compiler *compiler, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; return SLJIT_SUCCESS; } - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + if (dst <= TMP_REGISTER) { EMIT_MOV(compiler, dst, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } -static int emit_clz(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src, sljit_w srcw) +static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; - int dst_r; + sljit_ub* inst; + sljit_si dst_r; - SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(op_flags); if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { /* Just set the zero flag. */ EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); #else - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); #endif - FAIL_IF(!code); - *code |= 0x5 << 3; + FAIL_IF(!inst); + *inst |= SHR; return SLJIT_SUCCESS; } if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, srcw); src = TMP_REGISTER; srcw = 0; } - code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xbd; + inst = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = BSR_r_rm; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) + if (dst <= TMP_REGISTER) dst_r = dst; else { /* Find an unused temporary register. */ - if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4)) - dst_r = SLJIT_TEMPORARY_REG1; - else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4)) - dst_r = SLJIT_TEMPORARY_REG2; + if ((dst & 0xf) != SLJIT_SCRATCH_REG1 && (dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4)) + dst_r = SLJIT_SCRATCH_REG1; + else if ((dst & 0xf) != SLJIT_SCRATCH_REG2 && (dst & 0xf0) != (SLJIT_SCRATCH_REG2 << 4)) + dst_r = SLJIT_SCRATCH_REG2; else - dst_r = SLJIT_TEMPORARY_REG3; + dst_r = SLJIT_SCRATCH_REG3; EMIT_MOV(compiler, dst, dstw, dst_r, 0); } EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); #else - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2; + dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REG2; compiler->mode32 = 0; - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); - compiler->mode32 = op & SLJIT_INT_OP; + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); + compiler->mode32 = op_flags & SLJIT_INT_OP; #endif - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0x45; + if (cpu_has_cmov == -1) + get_cpu_features(); + if (cpu_has_cmov) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVNE_r_rm; + } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + *inst++ = JE_i8; + *inst++ = 2; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REGISTER]; #else - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + + *inst++ = JE_i8; + *inst++ = 3; + *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REGISTER] >= 8 ? REX_B : 0); + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REGISTER]; #endif - FAIL_IF(!code); - *(code + 1) |= 0x6 << 3; + } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); +#else + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); +#endif + FAIL_IF(!inst); + *(inst + 1) |= XOR; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } #else if (dst & SLJIT_MEM) @@ -1041,17 +1239,18 @@ static int emit_clz(struct sljit_compiler *compiler, i return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - sljit_ub* code; - int update = 0; + sljit_ub* inst; + sljit_si update = 0; + sljit_si op_flags = GET_ALL_FLAGS(op); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - int dst_is_ereg = 0; - int src_is_ereg = 0; + sljit_si dst_is_ereg = 0; + sljit_si src_is_ereg = 0; #else - #define src_is_ereg 0 +# define src_is_ereg 0 #endif CHECK_ERROR(); @@ -1062,41 +1261,58 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct slj CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op & SLJIT_INT_OP; + compiler->mode32 = op_flags & SLJIT_INT_OP; #endif - if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) { - op = GET_OPCODE(op); + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; #endif - SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset); + if (op_flags & SLJIT_INT_OP) { + if (src <= TMP_REGISTER && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) + op = SLJIT_MOV_UI; + if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) + op = SLJIT_MOVU_UI; + if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) + op = SLJIT_MOV_SI; + if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) + op = SLJIT_MOVU_SI; +#endif + } + + SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); if (op >= SLJIT_MOVU) { update = 1; - op -= 7; + op -= 8; } if (src & SLJIT_IMM) { switch (op) { case SLJIT_MOV_UB: - srcw = (unsigned char)srcw; + srcw = (sljit_ub)srcw; break; case SLJIT_MOV_SB: - srcw = (signed char)srcw; + srcw = (sljit_sb)srcw; break; case SLJIT_MOV_UH: - srcw = (unsigned short)srcw; + srcw = (sljit_uh)srcw; break; case SLJIT_MOV_SH: - srcw = (signed short)srcw; + srcw = (sljit_sh)srcw; break; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) case SLJIT_MOV_UI: - srcw = (unsigned int)srcw; + srcw = (sljit_ui)srcw; break; case SLJIT_MOV_SI: - srcw = (signed int)srcw; + srcw = (sljit_si)srcw; break; #endif } @@ -1107,15 +1323,15 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct slj } if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) { - code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8d; + inst = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); + FAIL_IF(!inst); + *inst = LEA_r_m; src &= SLJIT_MEM | 0xf; srcw = 0; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) { + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); dst = TMP_REGISTER; } @@ -1123,6 +1339,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct slj switch (op) { case SLJIT_MOV: + case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) case SLJIT_MOV_UI: case SLJIT_MOV_SI: @@ -1130,23 +1347,23 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct slj FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); break; case SLJIT_MOV_UB: - FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw)); + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); break; case SLJIT_MOV_SB: - FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw)); + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); break; case SLJIT_MOV_UH: - FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw)); + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); break; case SLJIT_MOV_SH: - FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw)); + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); break; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) case SLJIT_MOV_UI: - FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw)); + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); break; case SLJIT_MOV_SI: - FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw)); + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); break; #endif } @@ -1157,77 +1374,77 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct slj #endif if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) { - code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x8d; + inst = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); + FAIL_IF(!inst); + *inst = LEA_r_m; } return SLJIT_SUCCESS; } - if (SLJIT_UNLIKELY(GET_FLAGS(op))) + if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) compiler->flags_saved = 0; - switch (GET_OPCODE(op)) { + switch (op) { case SLJIT_NOT: - if (SLJIT_UNLIKELY(op & SLJIT_SET_E)) + if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) return emit_not_with_flags(compiler, dst, dstw, src, srcw); - return emit_unary(compiler, 0x2, dst, dstw, src, srcw); + return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); case SLJIT_NEG: - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) FAIL_IF(emit_save_flags(compiler)); - return emit_unary(compiler, 0x3, dst, dstw, src, srcw); + return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); case SLJIT_CLZ: - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) FAIL_IF(emit_save_flags(compiler)); - return emit_clz(compiler, op, dst, dstw, src, srcw); + return emit_clz(compiler, op_flags, dst, dstw, src, srcw); } return SLJIT_SUCCESS; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - #undef src_is_ereg +# undef src_is_ereg #endif } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ if (IS_HALFWORD(immw) || compiler->mode32) { \ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!code); \ - *(code + 1) |= (_op_imm_); \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ } \ else { \ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ - FAIL_IF(!code); \ - *code = (_op_mr_); \ + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ } -#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw)) +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) #else -#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!code); \ - *(code + 1) |= (_op_imm_); +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); -#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ - FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw)) +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) #endif -static int emit_cum_binary(struct sljit_compiler *compiler, +static sljit_si emit_cum_binary(struct sljit_compiler *compiler, sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); @@ -1235,9 +1452,9 @@ static int emit_cum_binary(struct sljit_compiler *comp BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } return SLJIT_SUCCESS; } @@ -1245,9 +1462,9 @@ static int emit_cum_binary(struct sljit_compiler *comp if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1255,22 +1472,22 @@ static int emit_cum_binary(struct sljit_compiler *comp BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); } } - else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + else if (dst <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } - else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) { - /* Special exception for sljit_emit_cond_value. */ - code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + else if (src2 <= TMP_REGISTER) { + /* Special exception for sljit_emit_op_flags. */ + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } @@ -1279,9 +1496,9 @@ static int emit_cum_binary(struct sljit_compiler *comp if (dst == src2 && dstw == src2w) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) { + if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src1w); } @@ -1289,35 +1506,35 @@ static int emit_cum_binary(struct sljit_compiler *comp BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); } } - else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); - FAIL_IF(!code); - *code = op_rm; + else if (dst <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; } - else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + else if (src1 <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } /* General version. */ - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + if (dst <= TMP_REGISTER) { EMIT_MOV(compiler, dst, 0, src1, src1w); if (src2 & SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { - code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } } else { @@ -1327,9 +1544,9 @@ static int emit_cum_binary(struct sljit_compiler *comp BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); } @@ -1337,13 +1554,13 @@ static int emit_cum_binary(struct sljit_compiler *comp return SLJIT_SUCCESS; } -static int emit_non_cum_binary(struct sljit_compiler *compiler, +static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); @@ -1351,9 +1568,9 @@ static int emit_non_cum_binary(struct sljit_compiler * BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } return SLJIT_SUCCESS; } @@ -1361,9 +1578,9 @@ static int emit_non_cum_binary(struct sljit_compiler * if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1371,35 +1588,35 @@ static int emit_non_cum_binary(struct sljit_compiler * BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); } } - else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + else if (dst <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } - else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + else if (src2 <= TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } /* General version. */ - if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) { + if (dst <= TMP_REGISTER && dst != src2) { EMIT_MOV(compiler, dst, 0, src1, src1w); if (src2 & SLJIT_IMM) { BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { - code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } } else { @@ -1409,9 +1626,9 @@ static int emit_non_cum_binary(struct sljit_compiler * BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); } @@ -1419,28 +1636,28 @@ static int emit_non_cum_binary(struct sljit_compiler * return SLJIT_SUCCESS; } -static int emit_mul(struct sljit_compiler *compiler, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +static sljit_si emit_mul(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; - int dst_r; + sljit_ub* inst; + sljit_si dst_r; - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; /* Register destination. */ if (dst_r == src1 && !(src2 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } else if (src1 & SLJIT_IMM) { if (src2 & SLJIT_IMM) { @@ -1450,42 +1667,42 @@ static int emit_mul(struct sljit_compiler *compiler, } if (src1w <= 127 && src1w >= -128) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x6b; - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!code); - INC_CSIZE(1); - *code = (sljit_b)src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src1w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_w*)code = src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src1w; } #else else if (IS_HALFWORD(src1w)) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_hw*)code = (sljit_hw)src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src1w; } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); if (dst_r != src2) EMIT_MOV(compiler, dst_r, 0, src2, src2w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } #endif } @@ -1493,42 +1710,42 @@ static int emit_mul(struct sljit_compiler *compiler, /* Note: src1 is NOT immediate. */ if (src2w <= 127 && src2w >= -128) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x6b; - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!code); - INC_CSIZE(1); - *code = (sljit_b)src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src2w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_w*)code = src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src2w; } #else else if (IS_HALFWORD(src2w)) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_hw*)code = (sljit_hw)src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src2w; } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); if (dst_r != src1) EMIT_MOV(compiler, dst_r, 0, src1, src1w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } #endif } @@ -1537,10 +1754,10 @@ static int emit_mul(struct sljit_compiler *compiler, if (ADDRESSING_DEPENDS_ON(src2, dst_r)) dst_r = TMP_REGISTER; EMIT_MOV(compiler, dst_r, 0, src1, src1w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } if (dst_r == TMP_REGISTER) @@ -1549,51 +1766,53 @@ static int emit_mul(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static int emit_lea_binary(struct sljit_compiler *compiler, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; - int dst_r, done = 0; + sljit_ub* inst; + sljit_si dst_r, done = 0; /* These cases better be left to handled by normal way. */ - if (dst == src1 && dstw == src1w) - return SLJIT_ERR_UNSUPPORTED; - if (dst == src2 && dstw == src2w) - return SLJIT_ERR_UNSUPPORTED; + if (!keep_flags) { + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + } - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { - if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); - FAIL_IF(!code); - *code = 0x8d; + if (src1 <= TMP_REGISTER) { + if (src2 <= TMP_REGISTER || src2 == TMP_REGISTER) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); #else if (src2 & SLJIT_IMM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); #endif - FAIL_IF(!code); - *code = 0x8d; + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } } - else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { + else if (src2 <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); #else if (src1 & SLJIT_IMM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); #endif - FAIL_IF(!code); - *code = 0x8d; + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } } @@ -1606,37 +1825,37 @@ static int emit_lea_binary(struct sljit_compiler *comp return SLJIT_ERR_UNSUPPORTED; } -static int emit_cmp_binary(struct sljit_compiler *compiler, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif - BINARY_EAX_IMM(0x3d, src2w); + BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; } - if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + if (src1 <= TMP_REGISTER) { if (src2 & SLJIT_IMM) { - BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0); + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); } else { - code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x3b; + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; } return SLJIT_SUCCESS; } - if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x39; + if (src2 <= TMP_REGISTER && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; return SLJIT_SUCCESS; } @@ -1646,93 +1865,93 @@ static int emit_cmp_binary(struct sljit_compiler *comp src1 = TMP_REGISTER; src1w = 0; } - BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w); + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x3b; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; } return SLJIT_SUCCESS; } -static int emit_test_binary(struct sljit_compiler *compiler, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +static sljit_si emit_test_binary(struct sljit_compiler *compiler, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif - BINARY_EAX_IMM(0xa9, src2w); + BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { + if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif - BINARY_EAX_IMM(0xa9, src1w); + BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; } - if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + if (src1 <= TMP_REGISTER) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } - if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { + if (src2 <= TMP_REGISTER) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src1w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } @@ -1741,72 +1960,72 @@ static int emit_test_binary(struct sljit_compiler *com if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } -static int emit_shift(struct sljit_compiler *compiler, +static sljit_si emit_shift(struct sljit_compiler *compiler, sljit_ub mode, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { if (dst == src1 && dstw == src1w) { - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); return SLJIT_SUCCESS; } - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + if (dst <= TMP_REGISTER) { EMIT_MOV(compiler, dst, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } @@ -1814,19 +2033,19 @@ static int emit_shift(struct sljit_compiler *compiler, if (dst == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); } - else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + else if (dst <= TMP_REGISTER && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { if (src1 != dst) EMIT_MOV(compiler, dst, 0, src1, src1w); EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); } else { @@ -1837,16 +2056,16 @@ static int emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); #else /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w), SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); #endif EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w)); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); #endif EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); } @@ -1854,11 +2073,11 @@ static int emit_shift(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -static int emit_shift_with_flags(struct sljit_compiler *compiler, - sljit_ub mode, int set_flags, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_ub mode, sljit_si set_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { /* The CPU does not set flags if the shift count is 0. */ if (src2 & SLJIT_IMM) { @@ -1872,27 +2091,27 @@ static int emit_shift_with_flags(struct sljit_compiler if (!set_flags) return emit_mov(compiler, dst, dstw, src1, src1w); /* OR dst, src, 0 */ - return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, dst, dstw, src1, src1w, SLJIT_IMM, 0); } if (!set_flags) return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); - if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)) + if (!(dst <= TMP_REGISTER)) FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + if (dst <= TMP_REGISTER) return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { CHECK_ERROR(); check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -1917,14 +2136,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct slj switch (GET_OPCODE(op)) { case SLJIT_ADD: if (!GET_FLAGS(op)) { - if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; } else compiler->flags_saved = 0; if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) FAIL_IF(emit_save_flags(compiler)); - return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ @@ -1933,11 +2152,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct slj FAIL_IF(emit_save_flags(compiler)); if (SLJIT_UNLIKELY(GET_FLAGS(op))) compiler->flags_saved = 0; - return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15, + return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: if (!GET_FLAGS(op)) { - if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; } else @@ -1946,7 +2165,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct slj FAIL_IF(emit_save_flags(compiler)); if (dst == SLJIT_UNUSED) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ @@ -1955,36 +2174,36 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct slj FAIL_IF(emit_save_flags(compiler)); if (SLJIT_UNLIKELY(GET_FLAGS(op))) compiler->flags_saved = 0; - return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d, + return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: if (dst == SLJIT_UNUSED) return emit_test_binary(compiler, src1, src1w, src2, src2w); - return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25, + return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: - return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35, + return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: - return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_LSHR: - return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ASHR: - return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { check_sljit_get_register_index(reg); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -1995,19 +2214,25 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index( return reg_map[reg]; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, int size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - sljit_ub *buf; + check_sljit_get_float_register_index(reg); + return reg; +} +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + sljit_ub *inst; + CHECK_ERROR(); check_sljit_emit_op_custom(compiler, instruction, size); SLJIT_ASSERT(size > 0 && size < 16); - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); - SLJIT_MEMMOVE(buf, instruction, size); + SLJIT_MEMMOVE(inst, instruction, size); return SLJIT_SUCCESS; } @@ -2018,107 +2243,82 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(stru #if (defined SLJIT_SSE2 && SLJIT_SSE2) /* Alignment + 2 * 16 bytes. */ -static sljit_i sse2_data[3 + 4 + 4]; -static sljit_i *sse2_buffer; +static sljit_si sse2_data[3 + (4 + 4) * 2]; +static sljit_si *sse2_buffer; -static void init_compiler() +static void init_compiler(void) { - sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf); - sse2_buffer[0] = 0; - sse2_buffer[1] = 0x80000000; - sse2_buffer[4] = 0xffffffff; - sse2_buffer[5] = 0x7fffffff; + sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); + /* Single precision constants. */ + sse2_buffer[0] = 0x80000000; + sse2_buffer[4] = 0x7fffffff; + /* Double precision constants. */ + sse2_buffer[8] = 0; + sse2_buffer[9] = 0x80000000; + sse2_buffer[12] = 0xffffffff; + sse2_buffer[13] = 0x7fffffff; } #endif -SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) { #if (defined SLJIT_SSE2 && SLJIT_SSE2) #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - static int sse2_available = -1; - int features; - - if (sse2_available != -1) - return sse2_available; - -#ifdef __GNUC__ - /* AT&T syntax. */ - asm ( - "pushl %%ebx\n" - "movl $0x1, %%eax\n" - "cpuid\n" - "popl %%ebx\n" - "movl %%edx, %0\n" - : "=g" (features) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined(_MSC_VER) || defined(__BORLANDC__) - /* Intel syntax. */ - __asm { - mov eax, 1 - push ebx - cpuid - pop ebx - mov features, edx - } -#else - #error "SLJIT_DETECT_SSE2 is not implemented for this C compiler" -#endif - sse2_available = (features >> 26) & 0x1; - return sse2_available; -#else + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else /* SLJIT_DETECT_SSE2 */ return 1; -#endif -#else +#endif /* SLJIT_DETECT_SSE2 */ +#else /* SLJIT_SSE2 */ return 0; #endif } #if (defined SLJIT_SSE2 && SLJIT_SSE2) -static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, - int xmm1, int xmm2, sljit_w xmm2w) +static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { - sljit_ub *buf; + sljit_ub *inst; - buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!buf); - *buf++ = 0x0f; - *buf = opcode; + inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; return SLJIT_SUCCESS; } -static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, - int xmm1, int xmm2, sljit_w xmm2w) +static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { - sljit_ub *buf; + sljit_ub *inst; - buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!buf); - *buf++ = 0x0f; - *buf = opcode; + inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; return SLJIT_SUCCESS; } -static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler, - int dst, int src, sljit_w srcw) +static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, + sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) { - return emit_sse2(compiler, 0x10, dst, src, srcw); + return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); } -static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler, - int dst, sljit_w dstw, int src) +static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, + sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) { - return emit_sse2(compiler, 0x11, src, dst, dstw); + return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { - int dst_r; + sljit_si dst_r; CHECK_ERROR(); check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); @@ -2127,57 +2327,57 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sl compiler->mode32 = 1; #endif - if (GET_OPCODE(op) == SLJIT_FCMP) { + if (GET_OPCODE(op) == SLJIT_CMPD) { compiler->flags_saved = 0; - if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) + if (dst <= SLJIT_FLOAT_REG6) dst_r = dst; else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); } - return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw); + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); } - if (op == SLJIT_FMOV) { - if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) - return emit_sse2_load(compiler, dst, src, srcw); - if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) - return emit_sse2_store(compiler, dst, dstw, src); - FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw)); - return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + if (op == SLJIT_MOVD) { + if (dst <= SLJIT_FLOAT_REG6) + return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); + if (src <= SLJIT_FLOAT_REG6) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); } - if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) { + if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG6) { dst_r = dst; if (dst != src) - FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); } - switch (op) { - case SLJIT_FNEG: - FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer)); + switch (GET_OPCODE(op)) { + case SLJIT_NEGD: + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); break; - case SLJIT_FABS: - FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4))); + case SLJIT_ABSD: + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { - int dst_r; + sljit_si dst_r; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -2186,55 +2386,55 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sl compiler->mode32 = 1; #endif - if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) { + if (dst <= SLJIT_FLOAT_REG6) { dst_r = dst; if (dst == src1) ; /* Do nothing here. */ - else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) { + else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) { /* Swap arguments. */ src2 = src1; src2w = src1w; } else if (dst != src2) - FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); } } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); } - switch (op) { - case SLJIT_FADD: - FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w)); + switch (GET_OPCODE(op)) { + case SLJIT_ADDD: + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_FSUB: - FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w)); + case SLJIT_SUBD: + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_FMUL: - FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w)); + case SLJIT_MULD: + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_FDIV: - FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w)); + case SLJIT_DIVD: + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } #else -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) { CHECK_ERROR(); /* Should cause an assertion fail. */ @@ -2243,10 +2443,10 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sl return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) { CHECK_ERROR(); /* Should cause an assertion fail. */ @@ -2263,7 +2463,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sl SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_label *label; CHECK_ERROR_PTR(); @@ -2281,18 +2481,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emi PTR_FAIL_IF(!label); set_label(label, compiler); - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); - *buf++ = 0; - *buf++ = 0; + *inst++ = 0; + *inst++ = 0; return label; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_jump *jump; CHECK_ERROR_PTR(); @@ -2319,17 +2519,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); #endif - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF_NULL(buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(inst); - *buf++ = 0; - *buf++ = type + 4; + *inst++ = 0; + *inst++ = type + 4; return jump; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) { - sljit_ub *code; + sljit_ub *inst; struct sljit_jump *jump; CHECK_ERROR(); @@ -2347,19 +2547,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct s if (type >= SLJIT_CALL1) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_TEMPORARY_REG3) { + if (src == SLJIT_SCRATCH_REG3) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); src = TMP_REGISTER; } if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) - srcw += sizeof(sljit_w); -#else - if (src == SLJIT_MEM1(SLJIT_LOCALS_REG)) - srcw += sizeof(sljit_w) * (type - SLJIT_CALL0); + srcw += sizeof(sljit_sw); #endif #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_TEMPORARY_REG3) { + if (src == SLJIT_SCRATCH_REG3) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); src = TMP_REGISTER; } @@ -2380,37 +2577,42 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct s compiler->size += 10 + 3; #endif - code = (sljit_ub*)ensure_buf(compiler, 2); - FAIL_IF_NULL(code); + inst = (sljit_ub*)ensure_buf(compiler, 2); + FAIL_IF_NULL(inst); - *code++ = 0; - *code++ = type + 4; + *inst++ = 0; + *inst++ = type + 4; } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; #endif - code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0xff; - *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3); + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; sljit_ub cond_set = 0; - int dst_save = dst; - sljit_w dstw_save = dstw; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - int reg; + sljit_si reg; +#else + /* CHECK_EXTRA_REGS migh overwrite these values. */ + sljit_si dst_save = dst; + sljit_sw dstw_save = dstw; #endif CHECK_ERROR(); - check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; @@ -2420,177 +2622,165 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(str if (SLJIT_UNLIKELY(compiler->flags_saved)) FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_EQUAL: - cond_set = 0x94; - break; + /* setcc = jcc + 0x10. */ + cond_set = get_jump_code(type) + 0x10; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - cond_set = 0x95; - break; - - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: - cond_set = 0x92; - break; - - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: - cond_set = 0x93; - break; - - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: - cond_set = 0x97; - break; - - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: - cond_set = 0x96; - break; - - case SLJIT_C_SIG_LESS: - cond_set = 0x9c; - break; - - case SLJIT_C_SIG_GREATER_EQUAL: - cond_set = 0x9d; - break; - - case SLJIT_C_SIG_GREATER: - cond_set = 0x9f; - break; - - case SLJIT_C_SIG_LESS_EQUAL: - cond_set = 0x9e; - break; - - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: - cond_set = 0x90; - break; - - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: - cond_set = 0x91; - break; - - case SLJIT_C_FLOAT_NAN: - cond_set = 0x9a; - break; - - case SLJIT_C_FLOAT_NOT_NAN: - cond_set = 0x9b; - break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); + FAIL_IF(!inst); + INC_SIZE(4 + 3); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[TMP_REGISTER] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[TMP_REGISTER]; + *inst++ = REX | (reg_map[TMP_REGISTER] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (reg_lmap[TMP_REGISTER] << 3) | reg_lmap[dst]; + return SLJIT_SUCCESS; } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); INC_SIZE(4 + 4); /* Set low register to conditional flag. */ - *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B; - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_lmap[reg]; - *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg]; + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[reg]; + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; - if (reg == TMP_REGISTER) { - if (op == SLJIT_MOV) { - compiler->mode32 = 0; - EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); - } - else { + if (reg != TMP_REGISTER) + return SLJIT_SUCCESS; + + if (GET_OPCODE(op) < SLJIT_ADD) { + compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; + return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; + compiler->skip_checks = 1; #endif - return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0); - } - } -#else - if (op == SLJIT_MOV) { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); - FAIL_IF(!buf); + return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0); +#else /* SLJIT_CONFIG_X86_64 */ + if (GET_OPCODE(op) < SLJIT_ADD && dst <= TMP_REGISTER) { + if (reg_map[dst] <= 4) { + /* Low byte is accessible. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); INC_SIZE(3 + 3); /* Set low byte to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_map[dst]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_map[dst]; - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst]; + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; + return SLJIT_SUCCESS; } - else { - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); - FAIL_IF(!buf); - INC_SIZE(3 + 3); - /* Set al to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0; + /* Low byte is not accessible. */ + if (cpu_has_cmov == -1) + get_cpu_features(); - *buf++ = 0x0f; - *buf++ = 0xb6; - if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS) - *buf = 0xC0 | (reg_map[dst] << 3); - else { - *buf = 0xC0; - EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0); - } + if (cpu_has_cmov) { + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, 1); + /* a xor reg, reg operation would overwrite the flags. */ + EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); - EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + *inst++ = GROUP_0F; + /* cmovcc = setcc - 0x50. */ + *inst++ = cond_set - 0x50; + *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REGISTER]; + return SLJIT_SUCCESS; } + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; + return SLJIT_SUCCESS; } - else { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { - EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0); - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); - FAIL_IF(!buf); - INC_SIZE(3); - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_map[dst]; + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src && reg_map[dst] <= 4) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax); + if (dst != SLJIT_SCRATCH_REG1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 2 + 1); + /* Set low register to conditional flag. */ + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; } else { - EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); + FAIL_IF(!inst); + INC_SIZE(2 + 3 + 2 + 2); + /* Set low register to conditional flag. */ + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 1 /* ecx */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER]; + } + return SLJIT_SUCCESS; + } - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1); - FAIL_IF(!buf); - INC_SIZE(3 + 3 + 1); - /* Set al to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0; + /* Set TMP_REGISTER to the bit. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf++ = 0xC0; + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; - *buf++ = 0x90 + reg_map[TMP_REGISTER]; - } + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; + + if (GET_OPCODE(op) < SLJIT_ADD) + return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->skip_checks = 1; + compiler->skip_checks = 1; #endif - return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0); - } -#endif - - return SLJIT_SUCCESS; + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0); +#endif /* SLJIT_CONFIG_X86_64 */ } -SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); check_sljit_get_local_base(compiler, dst, dstw, offset); @@ -2608,25 +2798,25 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(stru if (NOT_HALFWORD(offset)) { FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset)); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED); return compiler->error; #else - return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0); #endif } #endif if (offset != 0) - return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_const *const_; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - int reg; + sljit_si reg; #endif CHECK_ERROR_PTR(); @@ -2641,7 +2831,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; - reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + reg = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER; if (emit_load_imm64(compiler, reg, init_value)) return NULL; @@ -2653,11 +2843,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emi return NULL; #endif - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); - *buf++ = 0; - *buf++ = 1; + *inst++ = 0; + *inst++ = 1; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (reg == TMP_REGISTER && dst != SLJIT_UNUSED) @@ -2671,13 +2861,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_w*)addr = new_addr - (addr + 4); + *(sljit_sw*)addr = new_addr - (addr + 4); #else *(sljit_uw*)addr = new_addr; #endif } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) { - *(sljit_w*)addr = new_constant; + *(sljit_sw*)addr = new_constant; }