File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / sljit / sljitNativeX86_common.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Oct 9 09:19:18 2012 UTC (11 years, 8 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_31, HEAD
pcre

    1: /*
    2:  *    Stack-less Just-In-Time compiler
    3:  *
    4:  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5:  *
    6:  * Redistribution and use in source and binary forms, with or without modification, are
    7:  * permitted provided that the following conditions are met:
    8:  *
    9:  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10:  *      conditions and the following disclaimer.
   11:  *
   12:  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13:  *      of conditions and the following disclaimer in the documentation and/or other materials
   14:  *      provided with the distribution.
   15:  *
   16:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19:  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21:  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24:  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25:  */
   26: 
   27: SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
   28: {
   29: 	return "x86" SLJIT_CPUINFO;
   30: }
   31: 
   32: /*
   33:    32b register indexes:
   34:      0 - EAX
   35:      1 - ECX
   36:      2 - EDX
   37:      3 - EBX
   38:      4 - none
   39:      5 - EBP
   40:      6 - ESI
   41:      7 - EDI
   42: */
   43: 
   44: /*
   45:    64b register indexes:
   46:      0 - RAX
   47:      1 - RCX
   48:      2 - RDX
   49:      3 - RBX
   50:      4 - none
   51:      5 - RBP
   52:      6 - RSI
   53:      7 - RDI
   54:      8 - R8   - From now on REX prefix is required
   55:      9 - R9
   56:     10 - R10
   57:     11 - R11
   58:     12 - R12
   59:     13 - R13
   60:     14 - R14
   61:     15 - R15
   62: */
   63: 
   64: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   65: 
   66: /* Last register + 1. */
   67: #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
   68: 
   69: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
   70:   0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
   71: };
   72: 
   73: #define CHECK_EXTRA_REGS(p, w, do) \
   74: 	if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
   75: 		w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
   76: 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   77: 		do; \
   78: 	} \
   79: 	else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
   80: 		w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
   81: 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   82: 		do; \
   83: 	}
   84: 
   85: #else /* SLJIT_CONFIG_X86_32 */
   86: 
   87: /* Last register + 1. */
   88: #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
   89: #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
   90: #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
   91: 
   92: /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
   93:    Note: avoid to use r12 and r13 for memory addessing
   94:    therefore r12 is better for SAVED_EREG than SAVED_REG. */
   95: #ifndef _WIN64
   96: /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
   97: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
   98:   0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
   99: };
  100: /* low-map. reg_map & 0x7. */
  101: static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  102:   0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
  103: };
  104: #else
  105: /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
  106: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
  107:   0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
  108: };
  109: /* low-map. reg_map & 0x7. */
  110: static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  111:   0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
  112: };
  113: #endif
  114: 
  115: #define REX_W		0x48
  116: #define REX_R		0x44
  117: #define REX_X		0x42
  118: #define REX_B		0x41
  119: #define REX		0x40
  120: 
  121: typedef unsigned int sljit_uhw;
  122: typedef int sljit_hw;
  123: 
  124: #define IS_HALFWORD(x)		((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
  125: #define NOT_HALFWORD(x)		((x) > 0x7fffffffll || (x) < -0x80000000ll)
  126: 
  127: #define CHECK_EXTRA_REGS(p, w, do)
  128: 
  129: #endif /* SLJIT_CONFIG_X86_32 */
  130: 
  131: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  132: #define TMP_FREG	(SLJIT_FLOAT_REG4 + 1)
  133: #endif
  134: 
  135: /* Size flags for emit_x86_instruction: */
  136: #define EX86_BIN_INS		0x0010
  137: #define EX86_SHIFT_INS		0x0020
  138: #define EX86_REX		0x0040
  139: #define EX86_NO_REXW		0x0080
  140: #define EX86_BYTE_ARG		0x0100
  141: #define EX86_HALF_ARG		0x0200
  142: #define EX86_PREF_66		0x0400
  143: 
  144: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  145: #define EX86_PREF_F2		0x0800
  146: #define EX86_SSE2		0x1000
  147: #endif
  148: 
  149: #define INC_SIZE(s)			(*buf++ = (s), compiler->size += (s))
  150: #define INC_CSIZE(s)			(*code++ = (s), compiler->size += (s))
  151: 
  152: #define PUSH_REG(r)			(*buf++ = (0x50 + (r)))
  153: #define POP_REG(r)			(*buf++ = (0x58 + (r)))
  154: #define RET()				(*buf++ = (0xc3))
  155: #define RETN(n)				(*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
  156: /* r32, r/m32 */
  157: #define MOV_RM(mod, reg, rm)		(*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
  158: 
  159: static sljit_ub get_jump_code(int type)
  160: {
  161: 	switch (type) {
  162: 	case SLJIT_C_EQUAL:
  163: 	case SLJIT_C_FLOAT_EQUAL:
  164: 		return 0x84;
  165: 
  166: 	case SLJIT_C_NOT_EQUAL:
  167: 	case SLJIT_C_FLOAT_NOT_EQUAL:
  168: 		return 0x85;
  169: 
  170: 	case SLJIT_C_LESS:
  171: 	case SLJIT_C_FLOAT_LESS:
  172: 		return 0x82;
  173: 
  174: 	case SLJIT_C_GREATER_EQUAL:
  175: 	case SLJIT_C_FLOAT_GREATER_EQUAL:
  176: 		return 0x83;
  177: 
  178: 	case SLJIT_C_GREATER:
  179: 	case SLJIT_C_FLOAT_GREATER:
  180: 		return 0x87;
  181: 
  182: 	case SLJIT_C_LESS_EQUAL:
  183: 	case SLJIT_C_FLOAT_LESS_EQUAL:
  184: 		return 0x86;
  185: 
  186: 	case SLJIT_C_SIG_LESS:
  187: 		return 0x8c;
  188: 
  189: 	case SLJIT_C_SIG_GREATER_EQUAL:
  190: 		return 0x8d;
  191: 
  192: 	case SLJIT_C_SIG_GREATER:
  193: 		return 0x8f;
  194: 
  195: 	case SLJIT_C_SIG_LESS_EQUAL:
  196: 		return 0x8e;
  197: 
  198: 	case SLJIT_C_OVERFLOW:
  199: 	case SLJIT_C_MUL_OVERFLOW:
  200: 		return 0x80;
  201: 
  202: 	case SLJIT_C_NOT_OVERFLOW:
  203: 	case SLJIT_C_MUL_NOT_OVERFLOW:
  204: 		return 0x81;
  205: 
  206: 	case SLJIT_C_FLOAT_NAN:
  207: 		return 0x8a;
  208: 
  209: 	case SLJIT_C_FLOAT_NOT_NAN:
  210: 		return 0x8b;
  211: 	}
  212: 	return 0;
  213: }
  214: 
  215: static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
  216: 
  217: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  218: static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
  219: #endif
  220: 
  221: static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
  222: {
  223: 	int short_jump;
  224: 	sljit_uw label_addr;
  225: 
  226: 	if (jump->flags & JUMP_LABEL)
  227: 		label_addr = (sljit_uw)(code + jump->u.label->size);
  228: 	else
  229: 		label_addr = jump->u.target;
  230: 	short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
  231: 
  232: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  233: 	if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
  234: 		return generate_far_jump_code(jump, code_ptr, type);
  235: #endif
  236: 
  237: 	if (type == SLJIT_JUMP) {
  238: 		if (short_jump)
  239: 			*code_ptr++ = 0xeb;
  240: 		else
  241: 			*code_ptr++ = 0xe9;
  242: 		jump->addr++;
  243: 	}
  244: 	else if (type >= SLJIT_FAST_CALL) {
  245: 		short_jump = 0;
  246: 		*code_ptr++ = 0xe8;
  247: 		jump->addr++;
  248: 	}
  249: 	else if (short_jump) {
  250: 		*code_ptr++ = get_jump_code(type) - 0x10;
  251: 		jump->addr++;
  252: 	}
  253: 	else {
  254: 		*code_ptr++ = 0x0f;
  255: 		*code_ptr++ = get_jump_code(type);
  256: 		jump->addr += 2;
  257: 	}
  258: 
  259: 	if (short_jump) {
  260: 		jump->flags |= PATCH_MB;
  261: 		code_ptr += sizeof(sljit_b);
  262: 	} else {
  263: 		jump->flags |= PATCH_MW;
  264: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  265: 		code_ptr += sizeof(sljit_w);
  266: #else
  267: 		code_ptr += sizeof(sljit_hw);
  268: #endif
  269: 	}
  270: 
  271: 	return code_ptr;
  272: }
  273: 
  274: SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  275: {
  276: 	struct sljit_memory_fragment *buf;
  277: 	sljit_ub *code;
  278: 	sljit_ub *code_ptr;
  279: 	sljit_ub *buf_ptr;
  280: 	sljit_ub *buf_end;
  281: 	sljit_ub len;
  282: 
  283: 	struct sljit_label *label;
  284: 	struct sljit_jump *jump;
  285: 	struct sljit_const *const_;
  286: 
  287: 	CHECK_ERROR_PTR();
  288: 	check_sljit_generate_code(compiler);
  289: 	reverse_buf(compiler);
  290: 
  291: 	/* Second code generation pass. */
  292: 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
  293: 	PTR_FAIL_WITH_EXEC_IF(code);
  294: 	buf = compiler->buf;
  295: 
  296: 	code_ptr = code;
  297: 	label = compiler->labels;
  298: 	jump = compiler->jumps;
  299: 	const_ = compiler->consts;
  300: 	do {
  301: 		buf_ptr = buf->memory;
  302: 		buf_end = buf_ptr + buf->used_size;
  303: 		do {
  304: 			len = *buf_ptr++;
  305: 			if (len > 0) {
  306: 				/* The code is already generated. */
  307: 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
  308: 				code_ptr += len;
  309: 				buf_ptr += len;
  310: 			}
  311: 			else {
  312: 				if (*buf_ptr >= 4) {
  313: 					jump->addr = (sljit_uw)code_ptr;
  314: 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  315: 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
  316: 					else
  317: 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
  318: 					jump = jump->next;
  319: 				}
  320: 				else if (*buf_ptr == 0) {
  321: 					label->addr = (sljit_uw)code_ptr;
  322: 					label->size = code_ptr - code;
  323: 					label = label->next;
  324: 				}
  325: 				else if (*buf_ptr == 1) {
  326: 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
  327: 					const_ = const_->next;
  328: 				}
  329: 				else {
  330: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  331: 					*code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
  332: 					buf_ptr++;
  333: 					*(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
  334: 					code_ptr += sizeof(sljit_w);
  335: 					buf_ptr += sizeof(sljit_w) - 1;
  336: #else
  337: 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
  338: 					buf_ptr += sizeof(sljit_w);
  339: #endif
  340: 				}
  341: 				buf_ptr++;
  342: 			}
  343: 		} while (buf_ptr < buf_end);
  344: 		SLJIT_ASSERT(buf_ptr == buf_end);
  345: 		buf = buf->next;
  346: 	} while (buf);
  347: 
  348: 	SLJIT_ASSERT(!label);
  349: 	SLJIT_ASSERT(!jump);
  350: 	SLJIT_ASSERT(!const_);
  351: 
  352: 	jump = compiler->jumps;
  353: 	while (jump) {
  354: 		if (jump->flags & PATCH_MB) {
  355: 			SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
  356: 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
  357: 		} else if (jump->flags & PATCH_MW) {
  358: 			if (jump->flags & JUMP_LABEL) {
  359: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  360: 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
  361: #else
  362: 				SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  363: 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
  364: #endif
  365: 			}
  366: 			else {
  367: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  368: 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
  369: #else
  370: 				SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  371: 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
  372: #endif
  373: 			}
  374: 		}
  375: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  376: 		else if (jump->flags & PATCH_MD)
  377: 			*(sljit_w*)jump->addr = jump->u.label->addr;
  378: #endif
  379: 
  380: 		jump = jump->next;
  381: 	}
  382: 
  383: 	/* Maybe we waste some space because of short jumps. */
  384: 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
  385: 	compiler->error = SLJIT_ERR_COMPILED;
  386: 	compiler->executable_size = compiler->size;
  387: 	return (void*)code;
  388: }
  389: 
  390: /* --------------------------------------------------------------------- */
  391: /*  Operators                                                            */
  392: /* --------------------------------------------------------------------- */
  393: 
  394: static int emit_cum_binary(struct sljit_compiler *compiler,
  395: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  396: 	int dst, sljit_w dstw,
  397: 	int src1, sljit_w src1w,
  398: 	int src2, sljit_w src2w);
  399: 
  400: static int emit_non_cum_binary(struct sljit_compiler *compiler,
  401: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  402: 	int dst, sljit_w dstw,
  403: 	int src1, sljit_w src1w,
  404: 	int src2, sljit_w src2w);
  405: 
  406: static int emit_mov(struct sljit_compiler *compiler,
  407: 	int dst, sljit_w dstw,
  408: 	int src, sljit_w srcw);
  409: 
  410: static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
  411: {
  412: 	sljit_ub *buf;
  413: 
  414: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  415: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  416: 	FAIL_IF(!buf);
  417: 	INC_SIZE(5);
  418: #else
  419: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  420: 	FAIL_IF(!buf);
  421: 	INC_SIZE(6);
  422: 	*buf++ = REX_W;
  423: #endif
  424: 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
  425: 	*buf++ = 0x64;
  426: 	*buf++ = 0x24;
  427: 	*buf++ = (sljit_ub)sizeof(sljit_w);
  428: 	*buf++ = 0x9c; /* pushfd / pushfq */
  429: 	compiler->flags_saved = 1;
  430: 	return SLJIT_SUCCESS;
  431: }
  432: 
  433: static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
  434: {
  435: 	sljit_ub *buf;
  436: 
  437: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  438: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  439: 	FAIL_IF(!buf);
  440: 	INC_SIZE(5);
  441: 	*buf++ = 0x9d; /* popfd */
  442: #else
  443: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  444: 	FAIL_IF(!buf);
  445: 	INC_SIZE(6);
  446: 	*buf++ = 0x9d; /* popfq */
  447: 	*buf++ = REX_W;
  448: #endif
  449: 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
  450: 	*buf++ = 0x64;
  451: 	*buf++ = 0x24;
  452: 	*buf++ = (sljit_ub)-(int)sizeof(sljit_w);
  453: 	compiler->flags_saved = keep_flags;
  454: 	return SLJIT_SUCCESS;
  455: }
  456: 
  457: #ifdef _WIN32
  458: #include <malloc.h>
  459: 
  460: static void SLJIT_CALL sljit_grow_stack(sljit_w local_size)
  461: {
  462: 	/* Workaround for calling the internal _chkstk() function on Windows.
  463: 	This function touches all 4k pages belongs to the requested stack space,
  464: 	which size is passed in local_size. This is necessary on Windows where
  465: 	the stack can only grow in 4k steps. However, this function just burn
  466: 	CPU cycles if the stack is large enough, but you don't know it in advance.
  467: 	I think this is a bad design even if it has some reasons. */
  468: 	alloca(local_size);
  469: }
  470: 
  471: #endif
  472: 
  473: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  474: #include "sljitNativeX86_32.c"
  475: #else
  476: #include "sljitNativeX86_64.c"
  477: #endif
  478: 
  479: static int emit_mov(struct sljit_compiler *compiler,
  480: 	int dst, sljit_w dstw,
  481: 	int src, sljit_w srcw)
  482: {
  483: 	sljit_ub* code;
  484: 
  485: 	if (dst == SLJIT_UNUSED) {
  486: 		/* No destination, doesn't need to setup flags. */
  487: 		if (src & SLJIT_MEM) {
  488: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  489: 			FAIL_IF(!code);
  490: 			*code = 0x8b;
  491: 		}
  492: 		return SLJIT_SUCCESS;
  493: 	}
  494: 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
  495: 		code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  496: 		FAIL_IF(!code);
  497: 		*code = 0x89;
  498: 		return SLJIT_SUCCESS;
  499: 	}
  500: 	if (src & SLJIT_IMM) {
  501: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  502: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  503: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  504: #else
  505: 			if (!compiler->mode32) {
  506: 				if (NOT_HALFWORD(srcw))
  507: 					return emit_load_imm64(compiler, dst, srcw);
  508: 			}
  509: 			else
  510: 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
  511: #endif
  512: 		}
  513: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  514: 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  515: 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
  516: 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
  517: 			FAIL_IF(!code);
  518: 			*code = 0x89;
  519: 			return SLJIT_SUCCESS;
  520: 		}
  521: #endif
  522: 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  523: 		FAIL_IF(!code);
  524: 		*code = 0xc7;
  525: 		return SLJIT_SUCCESS;
  526: 	}
  527: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  528: 		code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  529: 		FAIL_IF(!code);
  530: 		*code = 0x8b;
  531: 		return SLJIT_SUCCESS;
  532: 	}
  533: 
  534: 	/* Memory to memory move. Requires two instruction. */
  535: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  536: 	FAIL_IF(!code);
  537: 	*code = 0x8b;
  538: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
  539: 	FAIL_IF(!code);
  540: 	*code = 0x89;
  541: 	return SLJIT_SUCCESS;
  542: }
  543: 
  544: #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  545: 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  546: 
  547: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
  548: {
  549: 	sljit_ub *buf;
  550: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  551: 	int size;
  552: #endif
  553: 
  554: 	CHECK_ERROR();
  555: 	check_sljit_emit_op0(compiler, op);
  556: 
  557: 	switch (GET_OPCODE(op)) {
  558: 	case SLJIT_BREAKPOINT:
  559: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  560: 		FAIL_IF(!buf);
  561: 		INC_SIZE(1);
  562: 		*buf = 0xcc;
  563: 		break;
  564: 	case SLJIT_NOP:
  565: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  566: 		FAIL_IF(!buf);
  567: 		INC_SIZE(1);
  568: 		*buf = 0x90;
  569: 		break;
  570: 	case SLJIT_UMUL:
  571: 	case SLJIT_SMUL:
  572: 	case SLJIT_UDIV:
  573: 	case SLJIT_SDIV:
  574: 		compiler->flags_saved = 0;
  575: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  576: #ifdef _WIN64
  577: 		SLJIT_COMPILE_ASSERT(
  578: 			reg_map[SLJIT_TEMPORARY_REG1] == 0
  579: 			&& reg_map[SLJIT_TEMPORARY_REG2] == 2
  580: 			&& reg_map[TMP_REGISTER] > 7,
  581: 			invalid_register_assignment_for_div_mul);
  582: #else
  583: 		SLJIT_COMPILE_ASSERT(
  584: 			reg_map[SLJIT_TEMPORARY_REG1] == 0
  585: 			&& reg_map[SLJIT_TEMPORARY_REG2] < 7
  586: 			&& reg_map[TMP_REGISTER] == 2,
  587: 			invalid_register_assignment_for_div_mul);
  588: #endif
  589: 		compiler->mode32 = op & SLJIT_INT_OP;
  590: #endif
  591: 
  592: 		op = GET_OPCODE(op);
  593: 		if (op == SLJIT_UDIV) {
  594: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  595: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
  596: 			buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
  597: #else
  598: 			buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  599: #endif
  600: 			FAIL_IF(!buf);
  601: 			*buf = 0x33;
  602: 		}
  603: 
  604: 		if (op == SLJIT_SDIV) {
  605: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  606: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
  607: #endif
  608: 
  609: 			/* CDQ instruction */
  610: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  611: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  612: 			FAIL_IF(!buf);
  613: 			INC_SIZE(1);
  614: 			*buf = 0x99;
  615: #else
  616: 			if (compiler->mode32) {
  617: 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  618: 				FAIL_IF(!buf);
  619: 				INC_SIZE(1);
  620: 				*buf = 0x99;
  621: 			} else {
  622: 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  623: 				FAIL_IF(!buf);
  624: 				INC_SIZE(2);
  625: 				*buf++ = REX_W;
  626: 				*buf = 0x99;
  627: 			}
  628: #endif
  629: 		}
  630: 
  631: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  632: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  633: 		FAIL_IF(!buf);
  634: 		INC_SIZE(2);
  635: 		*buf++ = 0xf7;
  636: 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
  637: #else
  638: #ifdef _WIN64
  639: 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
  640: #else
  641: 		size = (!compiler->mode32) ? 3 : 2;
  642: #endif
  643: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  644: 		FAIL_IF(!buf);
  645: 		INC_SIZE(size);
  646: #ifdef _WIN64
  647: 		if (!compiler->mode32)
  648: 			*buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
  649: 		else if (op >= SLJIT_UDIV)
  650: 			*buf++ = REX_B;
  651: 		*buf++ = 0xf7;
  652: 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
  653: #else
  654: 		if (!compiler->mode32)
  655: 			*buf++ = REX_W;
  656: 		*buf++ = 0xf7;
  657: 		*buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
  658: #endif
  659: #endif
  660: 		switch (op) {
  661: 		case SLJIT_UMUL:
  662: 			*buf |= 4 << 3;
  663: 			break;
  664: 		case SLJIT_SMUL:
  665: 			*buf |= 5 << 3;
  666: 			break;
  667: 		case SLJIT_UDIV:
  668: 			*buf |= 6 << 3;
  669: 			break;
  670: 		case SLJIT_SDIV:
  671: 			*buf |= 7 << 3;
  672: 			break;
  673: 		}
  674: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  675: 		EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
  676: #endif
  677: 		break;
  678: 	}
  679: 
  680: 	return SLJIT_SUCCESS;
  681: }
  682: 
  683: #define ENCODE_PREFIX(prefix) \
  684: 	do { \
  685: 		code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
  686: 		FAIL_IF(!code); \
  687: 		INC_CSIZE(1); \
  688: 		*code = (prefix); \
  689: 	} while (0)
  690: 
  691: static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
  692: 	int dst, sljit_w dstw,
  693: 	int src, sljit_w srcw)
  694: {
  695: 	sljit_ub* code;
  696: 	int dst_r;
  697: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  698: 	int work_r;
  699: #endif
  700: 
  701: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  702: 	compiler->mode32 = 0;
  703: #endif
  704: 
  705: 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  706: 		return SLJIT_SUCCESS; /* Empty instruction. */
  707: 
  708: 	if (src & SLJIT_IMM) {
  709: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  710: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  711: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  712: #else
  713: 			return emit_load_imm64(compiler, dst, srcw);
  714: #endif
  715: 		}
  716: 		code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  717: 		FAIL_IF(!code);
  718: 		*code = 0xc6;
  719: 		return SLJIT_SUCCESS;
  720: 	}
  721: 
  722: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  723: 
  724: 	if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
  725: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  726: 		if (reg_map[src] >= 4) {
  727: 			SLJIT_ASSERT(dst_r == TMP_REGISTER);
  728: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
  729: 		} else
  730: 			dst_r = src;
  731: #else
  732: 		dst_r = src;
  733: #endif
  734: 	}
  735: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  736: 	else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
  737: 		/* src, dst are registers. */
  738: 		SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
  739: 		if (reg_map[dst] < 4) {
  740: 			if (dst != src)
  741: 				EMIT_MOV(compiler, dst, 0, src, 0);
  742: 			code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  743: 			FAIL_IF(!code);
  744: 			*code++ = 0x0f;
  745: 			*code = sign ? 0xbe : 0xb6;
  746: 		}
  747: 		else {
  748: 			if (dst != src)
  749: 				EMIT_MOV(compiler, dst, 0, src, 0);
  750: 			if (sign) {
  751: 				/* shl reg, 24 */
  752: 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  753: 				FAIL_IF(!code);
  754: 				*code |= 0x4 << 3;
  755: 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  756: 				FAIL_IF(!code);
  757: 				/* shr/sar reg, 24 */
  758: 				*code |= 0x7 << 3;
  759: 			}
  760: 			else {
  761: 				/* and dst, 0xff */
  762: 				code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
  763: 				FAIL_IF(!code);
  764: 				*(code + 1) |= 0x4 << 3;
  765: 			}
  766: 		}
  767: 		return SLJIT_SUCCESS;
  768: 	}
  769: #endif
  770: 	else {
  771: 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  772: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  773: 		FAIL_IF(!code);
  774: 		*code++ = 0x0f;
  775: 		*code = sign ? 0xbe : 0xb6;
  776: 	}
  777: 
  778: 	if (dst & SLJIT_MEM) {
  779: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  780: 		if (dst_r == TMP_REGISTER) {
  781: 			/* Find a non-used register, whose reg_map[src] < 4. */
  782: 			if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
  783: 				if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
  784: 					work_r = SLJIT_TEMPORARY_REG3;
  785: 				else
  786: 					work_r = SLJIT_TEMPORARY_REG2;
  787: 			}
  788: 			else {
  789: 				if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
  790: 					work_r = SLJIT_TEMPORARY_REG1;
  791: 				else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
  792: 					work_r = SLJIT_TEMPORARY_REG3;
  793: 				else
  794: 					work_r = SLJIT_TEMPORARY_REG2;
  795: 			}
  796: 
  797: 			if (work_r == SLJIT_TEMPORARY_REG1) {
  798: 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  799: 			}
  800: 			else {
  801: 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  802: 				FAIL_IF(!code);
  803: 				*code = 0x87;
  804: 			}
  805: 
  806: 			code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  807: 			FAIL_IF(!code);
  808: 			*code = 0x88;
  809: 
  810: 			if (work_r == SLJIT_TEMPORARY_REG1) {
  811: 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  812: 			}
  813: 			else {
  814: 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  815: 				FAIL_IF(!code);
  816: 				*code = 0x87;
  817: 			}
  818: 		}
  819: 		else {
  820: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  821: 			FAIL_IF(!code);
  822: 			*code = 0x88;
  823: 		}
  824: #else
  825: 		code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  826: 		FAIL_IF(!code);
  827: 		*code = 0x88;
  828: #endif
  829: 	}
  830: 
  831: 	return SLJIT_SUCCESS;
  832: }
  833: 
  834: static int emit_mov_half(struct sljit_compiler *compiler, int sign,
  835: 	int dst, sljit_w dstw,
  836: 	int src, sljit_w srcw)
  837: {
  838: 	sljit_ub* code;
  839: 	int dst_r;
  840: 
  841: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  842: 	compiler->mode32 = 0;
  843: #endif
  844: 
  845: 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  846: 		return SLJIT_SUCCESS; /* Empty instruction. */
  847: 
  848: 	if (src & SLJIT_IMM) {
  849: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  850: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  851: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  852: #else
  853: 			return emit_load_imm64(compiler, dst, srcw);
  854: #endif
  855: 		}
  856: 		code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  857: 		FAIL_IF(!code);
  858: 		*code = 0xc7;
  859: 		return SLJIT_SUCCESS;
  860: 	}
  861: 
  862: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  863: 
  864: 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
  865: 		dst_r = src;
  866: 	else {
  867: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  868: 		FAIL_IF(!code);
  869: 		*code++ = 0x0f;
  870: 		*code = sign ? 0xbf : 0xb7;
  871: 	}
  872: 
  873: 	if (dst & SLJIT_MEM) {
  874: 		code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  875: 		FAIL_IF(!code);
  876: 		*code = 0x89;
  877: 	}
  878: 
  879: 	return SLJIT_SUCCESS;
  880: }
  881: 
  882: static int emit_unary(struct sljit_compiler *compiler, int un_index,
  883: 	int dst, sljit_w dstw,
  884: 	int src, sljit_w srcw)
  885: {
  886: 	sljit_ub* code;
  887: 
  888: 	if (dst == SLJIT_UNUSED) {
  889: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  890: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  891: 		FAIL_IF(!code);
  892: 		*code++ = 0xf7;
  893: 		*code |= (un_index) << 3;
  894: 		return SLJIT_SUCCESS;
  895: 	}
  896: 	if (dst == src && dstw == srcw) {
  897: 		/* Same input and output */
  898: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  899: 		FAIL_IF(!code);
  900: 		*code++ = 0xf7;
  901: 		*code |= (un_index) << 3;
  902: 		return SLJIT_SUCCESS;
  903: 	}
  904: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  905: 		EMIT_MOV(compiler, dst, 0, src, srcw);
  906: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  907: 		FAIL_IF(!code);
  908: 		*code++ = 0xf7;
  909: 		*code |= (un_index) << 3;
  910: 		return SLJIT_SUCCESS;
  911: 	}
  912: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  913: 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  914: 	FAIL_IF(!code);
  915: 	*code++ = 0xf7;
  916: 	*code |= (un_index) << 3;
  917: 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  918: 	return SLJIT_SUCCESS;
  919: }
  920: 
  921: static int emit_not_with_flags(struct sljit_compiler *compiler,
  922: 	int dst, sljit_w dstw,
  923: 	int src, sljit_w srcw)
  924: {
  925: 	sljit_ub* code;
  926: 
  927: 	if (dst == SLJIT_UNUSED) {
  928: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  929: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  930: 		FAIL_IF(!code);
  931: 		*code++ = 0xf7;
  932: 		*code |= 0x2 << 3;
  933: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  934: 		FAIL_IF(!code);
  935: 		*code = 0x0b;
  936: 		return SLJIT_SUCCESS;
  937: 	}
  938: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  939: 		EMIT_MOV(compiler, dst, 0, src, srcw);
  940: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  941: 		FAIL_IF(!code);
  942: 		*code++ = 0xf7;
  943: 		*code |= 0x2 << 3;
  944: 		code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  945: 		FAIL_IF(!code);
  946: 		*code = 0x0b;
  947: 		return SLJIT_SUCCESS;
  948: 	}
  949: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  950: 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  951: 	FAIL_IF(!code);
  952: 	*code++ = 0xf7;
  953: 	*code |= 0x2 << 3;
  954: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  955: 	FAIL_IF(!code);
  956: 	*code = 0x0b;
  957: 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  958: 	return SLJIT_SUCCESS;
  959: }
  960: 
  961: static int emit_clz(struct sljit_compiler *compiler, int op,
  962: 	int dst, sljit_w dstw,
  963: 	int src, sljit_w srcw)
  964: {
  965: 	sljit_ub* code;
  966: 	int dst_r;
  967: 
  968: 	SLJIT_UNUSED_ARG(op);
  969: 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  970: 		/* Just set the zero flag. */
  971: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  972: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  973: 		FAIL_IF(!code);
  974: 		*code++ = 0xf7;
  975: 		*code |= 0x2 << 3;
  976: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  977: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
  978: #else
  979: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
  980: #endif
  981: 		FAIL_IF(!code);
  982: 		*code |= 0x5 << 3;
  983: 		return SLJIT_SUCCESS;
  984: 	}
  985: 
  986: 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  987: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  988: 		src = TMP_REGISTER;
  989: 		srcw = 0;
  990: 	}
  991: 
  992: 	code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
  993: 	FAIL_IF(!code);
  994: 	*code++ = 0x0f;
  995: 	*code = 0xbd;
  996: 
  997: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  998: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
  999: 		dst_r = dst;
 1000: 	else {
 1001: 		/* Find an unused temporary register. */
 1002: 		if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
 1003: 			dst_r = SLJIT_TEMPORARY_REG1;
 1004: 		else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
 1005: 			dst_r = SLJIT_TEMPORARY_REG2;
 1006: 		else
 1007: 			dst_r = SLJIT_TEMPORARY_REG3;
 1008: 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
 1009: 	}
 1010: 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
 1011: #else
 1012: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
 1013: 	compiler->mode32 = 0;
 1014: 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
 1015: 	compiler->mode32 = op & SLJIT_INT_OP;
 1016: #endif
 1017: 
 1018: 	code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
 1019: 	FAIL_IF(!code);
 1020: 	*code++ = 0x0f;
 1021: 	*code = 0x45;
 1022: 
 1023: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1024: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
 1025: #else
 1026: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
 1027: #endif
 1028: 	FAIL_IF(!code);
 1029: 	*(code + 1) |= 0x6 << 3;
 1030: 
 1031: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1032: 	if (dst & SLJIT_MEM) {
 1033: 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
 1034: 		FAIL_IF(!code);
 1035: 		*code = 0x87;
 1036: 	}
 1037: #else
 1038: 	if (dst & SLJIT_MEM)
 1039: 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
 1040: #endif
 1041: 	return SLJIT_SUCCESS;
 1042: }
 1043: 
 1044: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
 1045: 	int dst, sljit_w dstw,
 1046: 	int src, sljit_w srcw)
 1047: {
 1048: 	sljit_ub* code;
 1049: 	int update = 0;
 1050: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1051: 	int dst_is_ereg = 0;
 1052: 	int src_is_ereg = 0;
 1053: #else
 1054: 	#define src_is_ereg 0
 1055: #endif
 1056: 
 1057: 	CHECK_ERROR();
 1058: 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
 1059: 	ADJUST_LOCAL_OFFSET(dst, dstw);
 1060: 	ADJUST_LOCAL_OFFSET(src, srcw);
 1061: 
 1062: 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
 1063: 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
 1064: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1065: 	compiler->mode32 = op & SLJIT_INT_OP;
 1066: #endif
 1067: 
 1068: 	if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
 1069: 		op = GET_OPCODE(op);
 1070: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1071: 		compiler->mode32 = 0;
 1072: #endif
 1073: 
 1074: 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
 1075: 		if (op >= SLJIT_MOVU) {
 1076: 			update = 1;
 1077: 			op -= 7;
 1078: 		}
 1079: 
 1080: 		if (src & SLJIT_IMM) {
 1081: 			switch (op) {
 1082: 			case SLJIT_MOV_UB:
 1083: 				srcw = (unsigned char)srcw;
 1084: 				break;
 1085: 			case SLJIT_MOV_SB:
 1086: 				srcw = (signed char)srcw;
 1087: 				break;
 1088: 			case SLJIT_MOV_UH:
 1089: 				srcw = (unsigned short)srcw;
 1090: 				break;
 1091: 			case SLJIT_MOV_SH:
 1092: 				srcw = (signed short)srcw;
 1093: 				break;
 1094: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1095: 			case SLJIT_MOV_UI:
 1096: 				srcw = (unsigned int)srcw;
 1097: 				break;
 1098: 			case SLJIT_MOV_SI:
 1099: 				srcw = (signed int)srcw;
 1100: 				break;
 1101: #endif
 1102: 			}
 1103: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1104: 			if (SLJIT_UNLIKELY(dst_is_ereg))
 1105: 				return emit_mov(compiler, dst, dstw, src, srcw);
 1106: #endif
 1107: 		}
 1108: 
 1109: 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
 1110: 			code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
 1111: 			FAIL_IF(!code);
 1112: 			*code = 0x8d;
 1113: 			src &= SLJIT_MEM | 0xf;
 1114: 			srcw = 0;
 1115: 		}
 1116: 
 1117: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1118: 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
 1119: 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
 1120: 			dst = TMP_REGISTER;
 1121: 		}
 1122: #endif
 1123: 
 1124: 		switch (op) {
 1125: 		case SLJIT_MOV:
 1126: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1127: 		case SLJIT_MOV_UI:
 1128: 		case SLJIT_MOV_SI:
 1129: #endif
 1130: 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 1131: 			break;
 1132: 		case SLJIT_MOV_UB:
 1133: 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
 1134: 			break;
 1135: 		case SLJIT_MOV_SB:
 1136: 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
 1137: 			break;
 1138: 		case SLJIT_MOV_UH:
 1139: 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
 1140: 			break;
 1141: 		case SLJIT_MOV_SH:
 1142: 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
 1143: 			break;
 1144: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1145: 		case SLJIT_MOV_UI:
 1146: 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
 1147: 			break;
 1148: 		case SLJIT_MOV_SI:
 1149: 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
 1150: 			break;
 1151: #endif
 1152: 		}
 1153: 
 1154: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1155: 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
 1156: 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
 1157: #endif
 1158: 
 1159: 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
 1160: 			code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
 1161: 			FAIL_IF(!code);
 1162: 			*code = 0x8d;
 1163: 		}
 1164: 		return SLJIT_SUCCESS;
 1165: 	}
 1166: 
 1167: 	if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1168: 		compiler->flags_saved = 0;
 1169: 
 1170: 	switch (GET_OPCODE(op)) {
 1171: 	case SLJIT_NOT:
 1172: 		if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
 1173: 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
 1174: 		return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
 1175: 
 1176: 	case SLJIT_NEG:
 1177: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1178: 			FAIL_IF(emit_save_flags(compiler));
 1179: 		return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
 1180: 
 1181: 	case SLJIT_CLZ:
 1182: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1183: 			FAIL_IF(emit_save_flags(compiler));
 1184: 		return emit_clz(compiler, op, dst, dstw, src, srcw);
 1185: 	}
 1186: 
 1187: 	return SLJIT_SUCCESS;
 1188: 
 1189: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1190: 	#undef src_is_ereg
 1191: #endif
 1192: }
 1193: 
 1194: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1195: 
 1196: #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1197: 	if (IS_HALFWORD(immw) || compiler->mode32) { \
 1198: 		code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1199: 		FAIL_IF(!code); \
 1200: 		*(code + 1) |= (_op_imm_); \
 1201: 	} \
 1202: 	else { \
 1203: 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
 1204: 		code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
 1205: 		FAIL_IF(!code); \
 1206: 		*code = (_op_mr_); \
 1207: 	}
 1208: 
 1209: #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1210: 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
 1211: 
 1212: #else
 1213: 
 1214: #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1215: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1216: 	FAIL_IF(!code); \
 1217: 	*(code + 1) |= (_op_imm_);
 1218: 
 1219: #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1220: 	FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
 1221: 
 1222: #endif
 1223: 
 1224: static int emit_cum_binary(struct sljit_compiler *compiler,
 1225: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1226: 	int dst, sljit_w dstw,
 1227: 	int src1, sljit_w src1w,
 1228: 	int src2, sljit_w src2w)
 1229: {
 1230: 	sljit_ub* code;
 1231: 
 1232: 	if (dst == SLJIT_UNUSED) {
 1233: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1234: 		if (src2 & SLJIT_IMM) {
 1235: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1236: 		}
 1237: 		else {
 1238: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1239: 			FAIL_IF(!code);
 1240: 			*code = op_rm;
 1241: 		}
 1242: 		return SLJIT_SUCCESS;
 1243: 	}
 1244: 
 1245: 	if (dst == src1 && dstw == src1w) {
 1246: 		if (src2 & SLJIT_IMM) {
 1247: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1248: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1249: #else
 1250: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1251: #endif
 1252: 				BINARY_EAX_IMM(op_eax_imm, src2w);
 1253: 			}
 1254: 			else {
 1255: 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1256: 			}
 1257: 		}
 1258: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1259: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1260: 			FAIL_IF(!code);
 1261: 			*code = op_rm;
 1262: 		}
 1263: 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
 1264: 			/* Special exception for sljit_emit_cond_value. */
 1265: 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1266: 			FAIL_IF(!code);
 1267: 			*code = op_mr;
 1268: 		}
 1269: 		else {
 1270: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1271: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1272: 			FAIL_IF(!code);
 1273: 			*code = op_mr;
 1274: 		}
 1275: 		return SLJIT_SUCCESS;
 1276: 	}
 1277: 
 1278: 	/* Only for cumulative operations. */
 1279: 	if (dst == src2 && dstw == src2w) {
 1280: 		if (src1 & SLJIT_IMM) {
 1281: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1282: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1283: #else
 1284: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
 1285: #endif
 1286: 				BINARY_EAX_IMM(op_eax_imm, src1w);
 1287: 			}
 1288: 			else {
 1289: 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
 1290: 			}
 1291: 		}
 1292: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1293: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
 1294: 			FAIL_IF(!code);
 1295: 			*code = op_rm;
 1296: 		}
 1297: 		else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1298: 			code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
 1299: 			FAIL_IF(!code);
 1300: 			*code = op_mr;
 1301: 		}
 1302: 		else {
 1303: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1304: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1305: 			FAIL_IF(!code);
 1306: 			*code = op_mr;
 1307: 		}
 1308: 		return SLJIT_SUCCESS;
 1309: 	}
 1310: 
 1311: 	/* General version. */
 1312: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1313: 		EMIT_MOV(compiler, dst, 0, src1, src1w);
 1314: 		if (src2 & SLJIT_IMM) {
 1315: 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1316: 		}
 1317: 		else {
 1318: 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1319: 			FAIL_IF(!code);
 1320: 			*code = op_rm;
 1321: 		}
 1322: 	}
 1323: 	else {
 1324: 		/* This version requires less memory writing. */
 1325: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1326: 		if (src2 & SLJIT_IMM) {
 1327: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1328: 		}
 1329: 		else {
 1330: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1331: 			FAIL_IF(!code);
 1332: 			*code = op_rm;
 1333: 		}
 1334: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1335: 	}
 1336: 
 1337: 	return SLJIT_SUCCESS;
 1338: }
 1339: 
 1340: static int emit_non_cum_binary(struct sljit_compiler *compiler,
 1341: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1342: 	int dst, sljit_w dstw,
 1343: 	int src1, sljit_w src1w,
 1344: 	int src2, sljit_w src2w)
 1345: {
 1346: 	sljit_ub* code;
 1347: 
 1348: 	if (dst == SLJIT_UNUSED) {
 1349: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1350: 		if (src2 & SLJIT_IMM) {
 1351: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1352: 		}
 1353: 		else {
 1354: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1355: 			FAIL_IF(!code);
 1356: 			*code = op_rm;
 1357: 		}
 1358: 		return SLJIT_SUCCESS;
 1359: 	}
 1360: 
 1361: 	if (dst == src1 && dstw == src1w) {
 1362: 		if (src2 & SLJIT_IMM) {
 1363: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1364: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1365: #else
 1366: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1367: #endif
 1368: 				BINARY_EAX_IMM(op_eax_imm, src2w);
 1369: 			}
 1370: 			else {
 1371: 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1372: 			}
 1373: 		}
 1374: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1375: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1376: 			FAIL_IF(!code);
 1377: 			*code = op_rm;
 1378: 		}
 1379: 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1380: 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1381: 			FAIL_IF(!code);
 1382: 			*code = op_mr;
 1383: 		}
 1384: 		else {
 1385: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1386: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1387: 			FAIL_IF(!code);
 1388: 			*code = op_mr;
 1389: 		}
 1390: 		return SLJIT_SUCCESS;
 1391: 	}
 1392: 
 1393: 	/* General version. */
 1394: 	if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
 1395: 		EMIT_MOV(compiler, dst, 0, src1, src1w);
 1396: 		if (src2 & SLJIT_IMM) {
 1397: 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1398: 		}
 1399: 		else {
 1400: 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1401: 			FAIL_IF(!code);
 1402: 			*code = op_rm;
 1403: 		}
 1404: 	}
 1405: 	else {
 1406: 		/* This version requires less memory writing. */
 1407: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1408: 		if (src2 & SLJIT_IMM) {
 1409: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1410: 		}
 1411: 		else {
 1412: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1413: 			FAIL_IF(!code);
 1414: 			*code = op_rm;
 1415: 		}
 1416: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1417: 	}
 1418: 
 1419: 	return SLJIT_SUCCESS;
 1420: }
 1421: 
 1422: static int emit_mul(struct sljit_compiler *compiler,
 1423: 	int dst, sljit_w dstw,
 1424: 	int src1, sljit_w src1w,
 1425: 	int src2, sljit_w src2w)
 1426: {
 1427: 	sljit_ub* code;
 1428: 	int dst_r;
 1429: 
 1430: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1431: 
 1432: 	/* Register destination. */
 1433: 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
 1434: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1435: 		FAIL_IF(!code);
 1436: 		*code++ = 0x0f;
 1437: 		*code = 0xaf;
 1438: 	}
 1439: 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
 1440: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
 1441: 		FAIL_IF(!code);
 1442: 		*code++ = 0x0f;
 1443: 		*code = 0xaf;
 1444: 	}
 1445: 	else if (src1 & SLJIT_IMM) {
 1446: 		if (src2 & SLJIT_IMM) {
 1447: 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
 1448: 			src2 = dst_r;
 1449: 			src2w = 0;
 1450: 		}
 1451: 
 1452: 		if (src1w <= 127 && src1w >= -128) {
 1453: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1454: 			FAIL_IF(!code);
 1455: 			*code = 0x6b;
 1456: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1457: 			FAIL_IF(!code);
 1458: 			INC_CSIZE(1);
 1459: 			*code = (sljit_b)src1w;
 1460: 		}
 1461: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1462: 		else {
 1463: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1464: 			FAIL_IF(!code);
 1465: 			*code = 0x69;
 1466: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1467: 			FAIL_IF(!code);
 1468: 			INC_CSIZE(4);
 1469: 			*(sljit_w*)code = src1w;
 1470: 		}
 1471: #else
 1472: 		else if (IS_HALFWORD(src1w)) {
 1473: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1474: 			FAIL_IF(!code);
 1475: 			*code = 0x69;
 1476: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1477: 			FAIL_IF(!code);
 1478: 			INC_CSIZE(4);
 1479: 			*(sljit_hw*)code = (sljit_hw)src1w;
 1480: 		}
 1481: 		else {
 1482: 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1483: 			if (dst_r != src2)
 1484: 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
 1485: 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1486: 			FAIL_IF(!code);
 1487: 			*code++ = 0x0f;
 1488: 			*code = 0xaf;
 1489: 		}
 1490: #endif
 1491: 	}
 1492: 	else if (src2 & SLJIT_IMM) {
 1493: 		/* Note: src1 is NOT immediate. */
 1494: 
 1495: 		if (src2w <= 127 && src2w >= -128) {
 1496: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1497: 			FAIL_IF(!code);
 1498: 			*code = 0x6b;
 1499: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1500: 			FAIL_IF(!code);
 1501: 			INC_CSIZE(1);
 1502: 			*code = (sljit_b)src2w;
 1503: 		}
 1504: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1505: 		else {
 1506: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1507: 			FAIL_IF(!code);
 1508: 			*code = 0x69;
 1509: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1510: 			FAIL_IF(!code);
 1511: 			INC_CSIZE(4);
 1512: 			*(sljit_w*)code = src2w;
 1513: 		}
 1514: #else
 1515: 		else if (IS_HALFWORD(src2w)) {
 1516: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1517: 			FAIL_IF(!code);
 1518: 			*code = 0x69;
 1519: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1520: 			FAIL_IF(!code);
 1521: 			INC_CSIZE(4);
 1522: 			*(sljit_hw*)code = (sljit_hw)src2w;
 1523: 		}
 1524: 		else {
 1525: 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1526: 			if (dst_r != src1)
 1527: 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1528: 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1529: 			FAIL_IF(!code);
 1530: 			*code++ = 0x0f;
 1531: 			*code = 0xaf;
 1532: 		}
 1533: #endif
 1534: 	}
 1535: 	else {
 1536: 		/* Neither argument is immediate. */
 1537: 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
 1538: 			dst_r = TMP_REGISTER;
 1539: 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1540: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1541: 		FAIL_IF(!code);
 1542: 		*code++ = 0x0f;
 1543: 		*code = 0xaf;
 1544: 	}
 1545: 
 1546: 	if (dst_r == TMP_REGISTER)
 1547: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1548: 
 1549: 	return SLJIT_SUCCESS;
 1550: }
 1551: 
 1552: static int emit_lea_binary(struct sljit_compiler *compiler,
 1553: 	int dst, sljit_w dstw,
 1554: 	int src1, sljit_w src1w,
 1555: 	int src2, sljit_w src2w)
 1556: {
 1557: 	sljit_ub* code;
 1558: 	int dst_r, done = 0;
 1559: 
 1560: 	/* These cases better be left to handled by normal way. */
 1561: 	if (dst == src1 && dstw == src1w)
 1562: 		return SLJIT_ERR_UNSUPPORTED;
 1563: 	if (dst == src2 && dstw == src2w)
 1564: 		return SLJIT_ERR_UNSUPPORTED;
 1565: 
 1566: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1567: 
 1568: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1569: 		if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) {
 1570: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
 1571: 			FAIL_IF(!code);
 1572: 			*code = 0x8d;
 1573: 			done = 1;
 1574: 		}
 1575: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1576: 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1577: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
 1578: #else
 1579: 		if (src2 & SLJIT_IMM) {
 1580: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
 1581: #endif
 1582: 			FAIL_IF(!code);
 1583: 			*code = 0x8d;
 1584: 			done = 1;
 1585: 		}
 1586: 	}
 1587: 	else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1588: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1589: 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1590: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
 1591: #else
 1592: 		if (src1 & SLJIT_IMM) {
 1593: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
 1594: #endif
 1595: 			FAIL_IF(!code);
 1596: 			*code = 0x8d;
 1597: 			done = 1;
 1598: 		}
 1599: 	}
 1600: 
 1601: 	if (done) {
 1602: 		if (dst_r == TMP_REGISTER)
 1603: 			return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
 1604: 		return SLJIT_SUCCESS;
 1605: 	}
 1606: 	return SLJIT_ERR_UNSUPPORTED;
 1607: }
 1608: 
 1609: static int emit_cmp_binary(struct sljit_compiler *compiler,
 1610: 	int src1, sljit_w src1w,
 1611: 	int src2, sljit_w src2w)
 1612: {
 1613: 	sljit_ub* code;
 1614: 
 1615: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1616: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1617: #else
 1618: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1619: #endif
 1620: 		BINARY_EAX_IMM(0x3d, src2w);
 1621: 		return SLJIT_SUCCESS;
 1622: 	}
 1623: 
 1624: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1625: 		if (src2 & SLJIT_IMM) {
 1626: 			BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
 1627: 		}
 1628: 		else {
 1629: 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1630: 			FAIL_IF(!code);
 1631: 			*code = 0x3b;
 1632: 		}
 1633: 		return SLJIT_SUCCESS;
 1634: 	}
 1635: 
 1636: 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
 1637: 		code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1638: 		FAIL_IF(!code);
 1639: 		*code = 0x39;
 1640: 		return SLJIT_SUCCESS;
 1641: 	}
 1642: 
 1643: 	if (src2 & SLJIT_IMM) {
 1644: 		if (src1 & SLJIT_IMM) {
 1645: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1646: 			src1 = TMP_REGISTER;
 1647: 			src1w = 0;
 1648: 		}
 1649: 		BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
 1650: 	}
 1651: 	else {
 1652: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1653: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1654: 		FAIL_IF(!code);
 1655: 		*code = 0x3b;
 1656: 	}
 1657: 	return SLJIT_SUCCESS;
 1658: }
 1659: 
 1660: static int emit_test_binary(struct sljit_compiler *compiler,
 1661: 	int src1, sljit_w src1w,
 1662: 	int src2, sljit_w src2w)
 1663: {
 1664: 	sljit_ub* code;
 1665: 
 1666: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1667: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1668: #else
 1669: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1670: #endif
 1671: 		BINARY_EAX_IMM(0xa9, src2w);
 1672: 		return SLJIT_SUCCESS;
 1673: 	}
 1674: 
 1675: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1676: 	if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1677: #else
 1678: 	if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 1679: #endif
 1680: 		BINARY_EAX_IMM(0xa9, src1w);
 1681: 		return SLJIT_SUCCESS;
 1682: 	}
 1683: 
 1684: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1685: 		if (src2 & SLJIT_IMM) {
 1686: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1687: 			if (IS_HALFWORD(src2w) || compiler->mode32) {
 1688: 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1689: 				FAIL_IF(!code);
 1690: 				*code = 0xf7;
 1691: 			}
 1692: 			else {
 1693: 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1694: 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
 1695: 				FAIL_IF(!code);
 1696: 				*code = 0x85;
 1697: 			}
 1698: #else
 1699: 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1700: 			FAIL_IF(!code);
 1701: 			*code = 0xf7;
 1702: #endif
 1703: 		}
 1704: 		else {
 1705: 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1706: 			FAIL_IF(!code);
 1707: 			*code = 0x85;
 1708: 		}
 1709: 		return SLJIT_SUCCESS;
 1710: 	}
 1711: 
 1712: 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1713: 		if (src1 & SLJIT_IMM) {
 1714: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1715: 			if (IS_HALFWORD(src1w) || compiler->mode32) {
 1716: 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
 1717: 				FAIL_IF(!code);
 1718: 				*code = 0xf7;
 1719: 			}
 1720: 			else {
 1721: 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
 1722: 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
 1723: 				FAIL_IF(!code);
 1724: 				*code = 0x85;
 1725: 			}
 1726: #else
 1727: 			code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
 1728: 			FAIL_IF(!code);
 1729: 			*code = 0xf7;
 1730: #endif
 1731: 		}
 1732: 		else {
 1733: 			code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1734: 			FAIL_IF(!code);
 1735: 			*code = 0x85;
 1736: 		}
 1737: 		return SLJIT_SUCCESS;
 1738: 	}
 1739: 
 1740: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1741: 	if (src2 & SLJIT_IMM) {
 1742: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1743: 		if (IS_HALFWORD(src2w) || compiler->mode32) {
 1744: 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1745: 			FAIL_IF(!code);
 1746: 			*code = 0xf7;
 1747: 		}
 1748: 		else {
 1749: 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1750: 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
 1751: 			FAIL_IF(!code);
 1752: 			*code = 0x85;
 1753: 		}
 1754: #else
 1755: 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1756: 		FAIL_IF(!code);
 1757: 		*code = 0xf7;
 1758: #endif
 1759: 	}
 1760: 	else {
 1761: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1762: 		FAIL_IF(!code);
 1763: 		*code = 0x85;
 1764: 	}
 1765: 	return SLJIT_SUCCESS;
 1766: }
 1767: 
 1768: static int emit_shift(struct sljit_compiler *compiler,
 1769: 	sljit_ub mode,
 1770: 	int dst, sljit_w dstw,
 1771: 	int src1, sljit_w src1w,
 1772: 	int src2, sljit_w src2w)
 1773: {
 1774: 	sljit_ub* code;
 1775: 
 1776: 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 1777: 		if (dst == src1 && dstw == src1w) {
 1778: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
 1779: 			FAIL_IF(!code);
 1780: 			*code |= mode;
 1781: 			return SLJIT_SUCCESS;
 1782: 		}
 1783: 		if (dst == SLJIT_UNUSED) {
 1784: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1785: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1786: 			FAIL_IF(!code);
 1787: 			*code |= mode;
 1788: 			return SLJIT_SUCCESS;
 1789: 		}
 1790: 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
 1791: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1792: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1793: 			FAIL_IF(!code);
 1794: 			*code |= mode;
 1795: 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1796: 			return SLJIT_SUCCESS;
 1797: 		}
 1798: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1799: 			EMIT_MOV(compiler, dst, 0, src1, src1w);
 1800: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
 1801: 			FAIL_IF(!code);
 1802: 			*code |= mode;
 1803: 			return SLJIT_SUCCESS;
 1804: 		}
 1805: 
 1806: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1807: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1808: 		FAIL_IF(!code);
 1809: 		*code |= mode;
 1810: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1811: 		return SLJIT_SUCCESS;
 1812: 	}
 1813: 
 1814: 	if (dst == SLJIT_PREF_SHIFT_REG) {
 1815: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1816: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1817: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1818: 		FAIL_IF(!code);
 1819: 		*code |= mode;
 1820: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1821: 	}
 1822: 	else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 1823: 		if (src1 != dst)
 1824: 			EMIT_MOV(compiler, dst, 0, src1, src1w);
 1825: 		EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
 1826: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1827: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 1828: 		FAIL_IF(!code);
 1829: 		*code |= mode;
 1830: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1831: 	}
 1832: 	else {
 1833: 		/* This case is really difficult, since ecx itself may used for
 1834: 		   addressing, and we must ensure to work even in that case. */
 1835: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1836: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1837: 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 1838: #else
 1839: 		/* [esp+0] contains the flags. */
 1840: 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w), SLJIT_PREF_SHIFT_REG, 0);
 1841: #endif
 1842: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1843: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1844: 		FAIL_IF(!code);
 1845: 		*code |= mode;
 1846: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1847: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 1848: #else
 1849: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w));
 1850: #endif
 1851: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1852: 	}
 1853: 
 1854: 	return SLJIT_SUCCESS;
 1855: }
 1856: 
 1857: static int emit_shift_with_flags(struct sljit_compiler *compiler,
 1858: 	sljit_ub mode, int set_flags,
 1859: 	int dst, sljit_w dstw,
 1860: 	int src1, sljit_w src1w,
 1861: 	int src2, sljit_w src2w)
 1862: {
 1863: 	/* The CPU does not set flags if the shift count is 0. */
 1864: 	if (src2 & SLJIT_IMM) {
 1865: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1866: 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
 1867: 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1868: #else
 1869: 		if ((src2w & 0x1f) != 0)
 1870: 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1871: #endif
 1872: 		if (!set_flags)
 1873: 			return emit_mov(compiler, dst, dstw, src1, src1w);
 1874: 		/* OR dst, src, 0 */
 1875: 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
 1876: 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
 1877: 	}
 1878: 
 1879: 	if (!set_flags)
 1880: 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 1881: 
 1882: 	if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
 1883: 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
 1884: 
 1885: 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
 1886: 
 1887: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
 1888: 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
 1889: 	return SLJIT_SUCCESS;
 1890: }
 1891: 
 1892: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
 1893: 	int dst, sljit_w dstw,
 1894: 	int src1, sljit_w src1w,
 1895: 	int src2, sljit_w src2w)
 1896: {
 1897: 	CHECK_ERROR();
 1898: 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 1899: 	ADJUST_LOCAL_OFFSET(dst, dstw);
 1900: 	ADJUST_LOCAL_OFFSET(src1, src1w);
 1901: 	ADJUST_LOCAL_OFFSET(src2, src2w);
 1902: 
 1903: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 1904: 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
 1905: 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
 1906: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1907: 	compiler->mode32 = op & SLJIT_INT_OP;
 1908: #endif
 1909: 
 1910: 	if (GET_OPCODE(op) >= SLJIT_MUL) {
 1911: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1912: 			compiler->flags_saved = 0;
 1913: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1914: 			FAIL_IF(emit_save_flags(compiler));
 1915: 	}
 1916: 
 1917: 	switch (GET_OPCODE(op)) {
 1918: 	case SLJIT_ADD:
 1919: 		if (!GET_FLAGS(op)) {
 1920: 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
 1921: 				return compiler->error;
 1922: 		}
 1923: 		else
 1924: 			compiler->flags_saved = 0;
 1925: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1926: 			FAIL_IF(emit_save_flags(compiler));
 1927: 		return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
 1928: 			dst, dstw, src1, src1w, src2, src2w);
 1929: 	case SLJIT_ADDC:
 1930: 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1931: 			FAIL_IF(emit_restore_flags(compiler, 1));
 1932: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1933: 			FAIL_IF(emit_save_flags(compiler));
 1934: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1935: 			compiler->flags_saved = 0;
 1936: 		return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
 1937: 			dst, dstw, src1, src1w, src2, src2w);
 1938: 	case SLJIT_SUB:
 1939: 		if (!GET_FLAGS(op)) {
 1940: 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
 1941: 				return compiler->error;
 1942: 		}
 1943: 		else
 1944: 			compiler->flags_saved = 0;
 1945: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1946: 			FAIL_IF(emit_save_flags(compiler));
 1947: 		if (dst == SLJIT_UNUSED)
 1948: 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
 1949: 		return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
 1950: 			dst, dstw, src1, src1w, src2, src2w);
 1951: 	case SLJIT_SUBC:
 1952: 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1953: 			FAIL_IF(emit_restore_flags(compiler, 1));
 1954: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1955: 			FAIL_IF(emit_save_flags(compiler));
 1956: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1957: 			compiler->flags_saved = 0;
 1958: 		return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
 1959: 			dst, dstw, src1, src1w, src2, src2w);
 1960: 	case SLJIT_MUL:
 1961: 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
 1962: 	case SLJIT_AND:
 1963: 		if (dst == SLJIT_UNUSED)
 1964: 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
 1965: 		return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
 1966: 			dst, dstw, src1, src1w, src2, src2w);
 1967: 	case SLJIT_OR:
 1968: 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
 1969: 			dst, dstw, src1, src1w, src2, src2w);
 1970: 	case SLJIT_XOR:
 1971: 		return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
 1972: 			dst, dstw, src1, src1w, src2, src2w);
 1973: 	case SLJIT_SHL:
 1974: 		return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
 1975: 			dst, dstw, src1, src1w, src2, src2w);
 1976: 	case SLJIT_LSHR:
 1977: 		return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
 1978: 			dst, dstw, src1, src1w, src2, src2w);
 1979: 	case SLJIT_ASHR:
 1980: 		return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
 1981: 			dst, dstw, src1, src1w, src2, src2w);
 1982: 	}
 1983: 
 1984: 	return SLJIT_SUCCESS;
 1985: }
 1986: 
 1987: SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
 1988: {
 1989: 	check_sljit_get_register_index(reg);
 1990: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1991: 	if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
 1992: 			|| reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
 1993: 		return -1;
 1994: #endif
 1995: 	return reg_map[reg];
 1996: }
 1997: 
 1998: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
 1999: 	void *instruction, int size)
 2000: {
 2001: 	sljit_ub *buf;
 2002: 
 2003: 	CHECK_ERROR();
 2004: 	check_sljit_emit_op_custom(compiler, instruction, size);
 2005: 	SLJIT_ASSERT(size > 0 && size < 16);
 2006: 
 2007: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
 2008: 	FAIL_IF(!buf);
 2009: 	INC_SIZE(size);
 2010: 	SLJIT_MEMMOVE(buf, instruction, size);
 2011: 	return SLJIT_SUCCESS;
 2012: }
 2013: 
 2014: /* --------------------------------------------------------------------- */
 2015: /*  Floating point operators                                             */
 2016: /* --------------------------------------------------------------------- */
 2017: 
 2018: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 2019: 
 2020: /* Alignment + 2 * 16 bytes. */
 2021: static sljit_i sse2_data[3 + 4 + 4];
 2022: static sljit_i *sse2_buffer;
 2023: 
 2024: static void init_compiler()
 2025: {
 2026: 	sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
 2027: 	sse2_buffer[0] = 0;
 2028: 	sse2_buffer[1] = 0x80000000;
 2029: 	sse2_buffer[4] = 0xffffffff;
 2030: 	sse2_buffer[5] = 0x7fffffff;
 2031: }
 2032: 
 2033: #endif
 2034: 
 2035: SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
 2036: {
 2037: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 2038: #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 2039: 	static int sse2_available = -1;
 2040: 	int features;
 2041: 
 2042: 	if (sse2_available != -1)
 2043: 		return sse2_available;
 2044: 
 2045: #ifdef __GNUC__
 2046: 	/* AT&T syntax. */
 2047: 	asm (
 2048: 		"pushl %%ebx\n"
 2049: 		"movl $0x1, %%eax\n"
 2050: 		"cpuid\n"
 2051: 		"popl %%ebx\n"
 2052: 		"movl %%edx, %0\n"
 2053: 		: "=g" (features)
 2054: 		:
 2055: 		: "%eax", "%ecx", "%edx"
 2056: 	);
 2057: #elif defined(_MSC_VER) || defined(__BORLANDC__)
 2058: 	/* Intel syntax. */
 2059: 	__asm {
 2060: 		mov eax, 1
 2061: 		push ebx
 2062: 		cpuid
 2063: 		pop ebx
 2064: 		mov features, edx
 2065: 	}
 2066: #else
 2067: 	#error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
 2068: #endif
 2069: 	sse2_available = (features >> 26) & 0x1;
 2070: 	return sse2_available;
 2071: #else
 2072: 	return 1;
 2073: #endif
 2074: #else
 2075: 	return 0;
 2076: #endif
 2077: }
 2078: 
 2079: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 2080: 
 2081: static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
 2082: 	int xmm1, int xmm2, sljit_w xmm2w)
 2083: {
 2084: 	sljit_ub *buf;
 2085: 
 2086: 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2087: 	FAIL_IF(!buf);
 2088: 	*buf++ = 0x0f;
 2089: 	*buf = opcode;
 2090: 	return SLJIT_SUCCESS;
 2091: }
 2092: 
 2093: static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
 2094: 	int xmm1, int xmm2, sljit_w xmm2w)
 2095: {
 2096: 	sljit_ub *buf;
 2097: 
 2098: 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 2099: 	FAIL_IF(!buf);
 2100: 	*buf++ = 0x0f;
 2101: 	*buf = opcode;
 2102: 	return SLJIT_SUCCESS;
 2103: }
 2104: 
 2105: static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
 2106: 	int dst, int src, sljit_w srcw)
 2107: {
 2108: 	return emit_sse2(compiler, 0x10, dst, src, srcw);
 2109: }
 2110: 
 2111: static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
 2112: 	int dst, sljit_w dstw, int src)
 2113: {
 2114: 	return emit_sse2(compiler, 0x11, src, dst, dstw);
 2115: }
 2116: 
 2117: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2118: 	int dst, sljit_w dstw,
 2119: 	int src, sljit_w srcw)
 2120: {
 2121: 	int dst_r;
 2122: 
 2123: 	CHECK_ERROR();
 2124: 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 2125: 
 2126: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2127: 	compiler->mode32 = 1;
 2128: #endif
 2129: 
 2130: 	if (GET_OPCODE(op) == SLJIT_FCMP) {
 2131: 		compiler->flags_saved = 0;
 2132: 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 2133: 			dst_r = dst;
 2134: 		else {
 2135: 			dst_r = TMP_FREG;
 2136: 			FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
 2137: 		}
 2138: 		return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
 2139: 	}
 2140: 
 2141: 	if (op == SLJIT_FMOV) {
 2142: 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 2143: 			return emit_sse2_load(compiler, dst, src, srcw);
 2144: 		if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
 2145: 			return emit_sse2_store(compiler, dst, dstw, src);
 2146: 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
 2147: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2148: 	}
 2149: 
 2150: 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 2151: 		dst_r = dst;
 2152: 		if (dst != src)
 2153: 			FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 2154: 	}
 2155: 	else {
 2156: 		dst_r = TMP_FREG;
 2157: 		FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 2158: 	}
 2159: 
 2160: 	switch (op) {
 2161: 	case SLJIT_FNEG:
 2162: 		FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
 2163: 		break;
 2164: 
 2165: 	case SLJIT_FABS:
 2166: 		FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
 2167: 		break;
 2168: 	}
 2169: 
 2170: 	if (dst_r == TMP_FREG)
 2171: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2172: 	return SLJIT_SUCCESS;
 2173: }
 2174: 
 2175: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2176: 	int dst, sljit_w dstw,
 2177: 	int src1, sljit_w src1w,
 2178: 	int src2, sljit_w src2w)
 2179: {
 2180: 	int dst_r;
 2181: 
 2182: 	CHECK_ERROR();
 2183: 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2184: 
 2185: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2186: 	compiler->mode32 = 1;
 2187: #endif
 2188: 
 2189: 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 2190: 		dst_r = dst;
 2191: 		if (dst == src1)
 2192: 			; /* Do nothing here. */
 2193: 		else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
 2194: 			/* Swap arguments. */
 2195: 			src2 = src1;
 2196: 			src2w = src1w;
 2197: 		}
 2198: 		else if (dst != src2)
 2199: 			FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
 2200: 		else {
 2201: 			dst_r = TMP_FREG;
 2202: 			FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2203: 		}
 2204: 	}
 2205: 	else {
 2206: 		dst_r = TMP_FREG;
 2207: 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2208: 	}
 2209: 
 2210: 	switch (op) {
 2211: 	case SLJIT_FADD:
 2212: 		FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
 2213: 		break;
 2214: 
 2215: 	case SLJIT_FSUB:
 2216: 		FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
 2217: 		break;
 2218: 
 2219: 	case SLJIT_FMUL:
 2220: 		FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
 2221: 		break;
 2222: 
 2223: 	case SLJIT_FDIV:
 2224: 		FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
 2225: 		break;
 2226: 	}
 2227: 
 2228: 	if (dst_r == TMP_FREG)
 2229: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2230: 	return SLJIT_SUCCESS;
 2231: }
 2232: 
 2233: #else
 2234: 
 2235: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2236: 	int dst, sljit_w dstw,
 2237: 	int src, sljit_w srcw)
 2238: {
 2239: 	CHECK_ERROR();
 2240: 	/* Should cause an assertion fail. */
 2241: 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 2242: 	compiler->error = SLJIT_ERR_UNSUPPORTED;
 2243: 	return SLJIT_ERR_UNSUPPORTED;
 2244: }
 2245: 
 2246: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2247: 	int dst, sljit_w dstw,
 2248: 	int src1, sljit_w src1w,
 2249: 	int src2, sljit_w src2w)
 2250: {
 2251: 	CHECK_ERROR();
 2252: 	/* Should cause an assertion fail. */
 2253: 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2254: 	compiler->error = SLJIT_ERR_UNSUPPORTED;
 2255: 	return SLJIT_ERR_UNSUPPORTED;
 2256: }
 2257: 
 2258: #endif
 2259: 
 2260: /* --------------------------------------------------------------------- */
 2261: /*  Conditional instructions                                             */
 2262: /* --------------------------------------------------------------------- */
 2263: 
 2264: SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2265: {
 2266: 	sljit_ub *buf;
 2267: 	struct sljit_label *label;
 2268: 
 2269: 	CHECK_ERROR_PTR();
 2270: 	check_sljit_emit_label(compiler);
 2271: 
 2272: 	/* We should restore the flags before the label,
 2273: 	   since other taken jumps has their own flags as well. */
 2274: 	if (SLJIT_UNLIKELY(compiler->flags_saved))
 2275: 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2276: 
 2277: 	if (compiler->last_label && compiler->last_label->size == compiler->size)
 2278: 		return compiler->last_label;
 2279: 
 2280: 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2281: 	PTR_FAIL_IF(!label);
 2282: 	set_label(label, compiler);
 2283: 
 2284: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2285: 	PTR_FAIL_IF(!buf);
 2286: 
 2287: 	*buf++ = 0;
 2288: 	*buf++ = 0;
 2289: 
 2290: 	return label;
 2291: }
 2292: 
 2293: SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
 2294: {
 2295: 	sljit_ub *buf;
 2296: 	struct sljit_jump *jump;
 2297: 
 2298: 	CHECK_ERROR_PTR();
 2299: 	check_sljit_emit_jump(compiler, type);
 2300: 
 2301: 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2302: 		if ((type & 0xff) <= SLJIT_JUMP)
 2303: 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2304: 		compiler->flags_saved = 0;
 2305: 	}
 2306: 
 2307: 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2308: 	PTR_FAIL_IF_NULL(jump);
 2309: 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2310: 	type &= 0xff;
 2311: 
 2312: 	if (type >= SLJIT_CALL1)
 2313: 		PTR_FAIL_IF(call_with_args(compiler, type));
 2314: 
 2315: 	/* Worst case size. */
 2316: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2317: 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
 2318: #else
 2319: 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
 2320: #endif
 2321: 
 2322: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2323: 	PTR_FAIL_IF_NULL(buf);
 2324: 
 2325: 	*buf++ = 0;
 2326: 	*buf++ = type + 4;
 2327: 	return jump;
 2328: }
 2329: 
 2330: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
 2331: {
 2332: 	sljit_ub *code;
 2333: 	struct sljit_jump *jump;
 2334: 
 2335: 	CHECK_ERROR();
 2336: 	check_sljit_emit_ijump(compiler, type, src, srcw);
 2337: 	ADJUST_LOCAL_OFFSET(src, srcw);
 2338: 
 2339: 	CHECK_EXTRA_REGS(src, srcw, (void)0);
 2340: 
 2341: 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2342: 		if (type <= SLJIT_JUMP)
 2343: 			FAIL_IF(emit_restore_flags(compiler, 0));
 2344: 		compiler->flags_saved = 0;
 2345: 	}
 2346: 
 2347: 	if (type >= SLJIT_CALL1) {
 2348: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2349: #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
 2350: 		if (src == SLJIT_TEMPORARY_REG3) {
 2351: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2352: 			src = TMP_REGISTER;
 2353: 		}
 2354: 		if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
 2355: 			srcw += sizeof(sljit_w);
 2356: #else
 2357: 		if (src == SLJIT_MEM1(SLJIT_LOCALS_REG))
 2358: 			srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
 2359: #endif
 2360: #endif
 2361: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
 2362: 		if (src == SLJIT_TEMPORARY_REG3) {
 2363: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2364: 			src = TMP_REGISTER;
 2365: 		}
 2366: #endif
 2367: 		FAIL_IF(call_with_args(compiler, type));
 2368: 	}
 2369: 
 2370: 	if (src == SLJIT_IMM) {
 2371: 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2372: 		FAIL_IF_NULL(jump);
 2373: 		set_jump(jump, compiler, JUMP_ADDR);
 2374: 		jump->u.target = srcw;
 2375: 
 2376: 		/* Worst case size. */
 2377: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2378: 		compiler->size += 5;
 2379: #else
 2380: 		compiler->size += 10 + 3;
 2381: #endif
 2382: 
 2383: 		code = (sljit_ub*)ensure_buf(compiler, 2);
 2384: 		FAIL_IF_NULL(code);
 2385: 
 2386: 		*code++ = 0;
 2387: 		*code++ = type + 4;
 2388: 	}
 2389: 	else {
 2390: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2391: 		/* REX_W is not necessary (src is not immediate). */
 2392: 		compiler->mode32 = 1;
 2393: #endif
 2394: 		code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2395: 		FAIL_IF(!code);
 2396: 		*code++ = 0xff;
 2397: 		*code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
 2398: 	}
 2399: 	return SLJIT_SUCCESS;
 2400: }
 2401: 
 2402: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
 2403: {
 2404: 	sljit_ub *buf;
 2405: 	sljit_ub cond_set = 0;
 2406: 	int dst_save = dst;
 2407: 	sljit_w dstw_save = dstw;
 2408: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2409: 	int reg;
 2410: #endif
 2411: 
 2412: 	CHECK_ERROR();
 2413: 	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
 2414: 
 2415: 	if (dst == SLJIT_UNUSED)
 2416: 		return SLJIT_SUCCESS;
 2417: 
 2418: 	ADJUST_LOCAL_OFFSET(dst, dstw);
 2419: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2420: 	if (SLJIT_UNLIKELY(compiler->flags_saved))
 2421: 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
 2422: 
 2423: 	switch (type) {
 2424: 	case SLJIT_C_EQUAL:
 2425: 	case SLJIT_C_FLOAT_EQUAL:
 2426: 		cond_set = 0x94;
 2427: 		break;
 2428: 
 2429: 	case SLJIT_C_NOT_EQUAL:
 2430: 	case SLJIT_C_FLOAT_NOT_EQUAL:
 2431: 		cond_set = 0x95;
 2432: 		break;
 2433: 
 2434: 	case SLJIT_C_LESS:
 2435: 	case SLJIT_C_FLOAT_LESS:
 2436: 		cond_set = 0x92;
 2437: 		break;
 2438: 
 2439: 	case SLJIT_C_GREATER_EQUAL:
 2440: 	case SLJIT_C_FLOAT_GREATER_EQUAL:
 2441: 		cond_set = 0x93;
 2442: 		break;
 2443: 
 2444: 	case SLJIT_C_GREATER:
 2445: 	case SLJIT_C_FLOAT_GREATER:
 2446: 		cond_set = 0x97;
 2447: 		break;
 2448: 
 2449: 	case SLJIT_C_LESS_EQUAL:
 2450: 	case SLJIT_C_FLOAT_LESS_EQUAL:
 2451: 		cond_set = 0x96;
 2452: 		break;
 2453: 
 2454: 	case SLJIT_C_SIG_LESS:
 2455: 		cond_set = 0x9c;
 2456: 		break;
 2457: 
 2458: 	case SLJIT_C_SIG_GREATER_EQUAL:
 2459: 		cond_set = 0x9d;
 2460: 		break;
 2461: 
 2462: 	case SLJIT_C_SIG_GREATER:
 2463: 		cond_set = 0x9f;
 2464: 		break;
 2465: 
 2466: 	case SLJIT_C_SIG_LESS_EQUAL:
 2467: 		cond_set = 0x9e;
 2468: 		break;
 2469: 
 2470: 	case SLJIT_C_OVERFLOW:
 2471: 	case SLJIT_C_MUL_OVERFLOW:
 2472: 		cond_set = 0x90;
 2473: 		break;
 2474: 
 2475: 	case SLJIT_C_NOT_OVERFLOW:
 2476: 	case SLJIT_C_MUL_NOT_OVERFLOW:
 2477: 		cond_set = 0x91;
 2478: 		break;
 2479: 
 2480: 	case SLJIT_C_FLOAT_NAN:
 2481: 		cond_set = 0x9a;
 2482: 		break;
 2483: 
 2484: 	case SLJIT_C_FLOAT_NOT_NAN:
 2485: 		cond_set = 0x9b;
 2486: 		break;
 2487: 	}
 2488: 
 2489: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2490: 	reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2491: 
 2492: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
 2493: 	FAIL_IF(!buf);
 2494: 	INC_SIZE(4 + 4);
 2495: 	/* Set low register to conditional flag. */
 2496: 	*buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
 2497: 	*buf++ = 0x0f;
 2498: 	*buf++ = cond_set;
 2499: 	*buf++ = 0xC0 | reg_lmap[reg];
 2500: 	*buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
 2501: 	*buf++ = 0x0f;
 2502: 	*buf++ = 0xb6;
 2503: 	*buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
 2504: 
 2505: 	if (reg == TMP_REGISTER) {
 2506: 		if (op == SLJIT_MOV) {
 2507: 			compiler->mode32 = 0;
 2508: 			EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 2509: 		}
 2510: 		else {
 2511: #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2512: 			compiler->skip_checks = 1;
 2513: #endif
 2514: 			return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
 2515: 		}
 2516: 	}
 2517: #else
 2518: 	if (op == SLJIT_MOV) {
 2519: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2520: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2521: 			FAIL_IF(!buf);
 2522: 			INC_SIZE(3 + 3);
 2523: 			/* Set low byte to conditional flag. */
 2524: 			*buf++ = 0x0f;
 2525: 			*buf++ = cond_set;
 2526: 			*buf++ = 0xC0 | reg_map[dst];
 2527: 
 2528: 			*buf++ = 0x0f;
 2529: 			*buf++ = 0xb6;
 2530: 			*buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
 2531: 		}
 2532: 		else {
 2533: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2534: 
 2535: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2536: 			FAIL_IF(!buf);
 2537: 			INC_SIZE(3 + 3);
 2538: 			/* Set al to conditional flag. */
 2539: 			*buf++ = 0x0f;
 2540: 			*buf++ = cond_set;
 2541: 			*buf++ = 0xC0;
 2542: 
 2543: 			*buf++ = 0x0f;
 2544: 			*buf++ = 0xb6;
 2545: 			if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
 2546: 				*buf = 0xC0 | (reg_map[dst] << 3);
 2547: 			else {
 2548: 				*buf = 0xC0;
 2549: 				EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
 2550: 			}
 2551: 
 2552: 			EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
 2553: 		}
 2554: 	}
 2555: 	else {
 2556: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2557: 			EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
 2558: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
 2559: 			FAIL_IF(!buf);
 2560: 			INC_SIZE(3);
 2561: 
 2562: 			*buf++ = 0x0f;
 2563: 			*buf++ = cond_set;
 2564: 			*buf++ = 0xC0 | reg_map[dst];
 2565: 		}
 2566: 		else {
 2567: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2568: 
 2569: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
 2570: 			FAIL_IF(!buf);
 2571: 			INC_SIZE(3 + 3 + 1);
 2572: 			/* Set al to conditional flag. */
 2573: 			*buf++ = 0x0f;
 2574: 			*buf++ = cond_set;
 2575: 			*buf++ = 0xC0;
 2576: 
 2577: 			*buf++ = 0x0f;
 2578: 			*buf++ = 0xb6;
 2579: 			*buf++ = 0xC0;
 2580: 
 2581: 			*buf++ = 0x90 + reg_map[TMP_REGISTER];
 2582: 		}
 2583: #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2584: 		compiler->skip_checks = 1;
 2585: #endif
 2586: 		return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
 2587: 	}
 2588: #endif
 2589: 
 2590: 	return SLJIT_SUCCESS;
 2591: }
 2592: 
 2593: SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
 2594: {
 2595: 	CHECK_ERROR();
 2596: 	check_sljit_get_local_base(compiler, dst, dstw, offset);
 2597: 	ADJUST_LOCAL_OFFSET(dst, dstw);
 2598: 
 2599: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2600: 
 2601: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2602: 	compiler->mode32 = 0;
 2603: #endif
 2604: 
 2605: 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
 2606: 
 2607: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2608: 	if (NOT_HALFWORD(offset)) {
 2609: 		FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
 2610: #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2611: 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
 2612: 		return compiler->error;
 2613: #else
 2614: 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
 2615: #endif
 2616: 	}
 2617: #endif
 2618: 
 2619: 	if (offset != 0)
 2620: 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
 2621: 	return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
 2622: }
 2623: 
 2624: SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
 2625: {
 2626: 	sljit_ub *buf;
 2627: 	struct sljit_const *const_;
 2628: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2629: 	int reg;
 2630: #endif
 2631: 
 2632: 	CHECK_ERROR_PTR();
 2633: 	check_sljit_emit_const(compiler, dst, dstw, init_value);
 2634: 	ADJUST_LOCAL_OFFSET(dst, dstw);
 2635: 
 2636: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2637: 
 2638: 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2639: 	PTR_FAIL_IF(!const_);
 2640: 	set_const(const_, compiler);
 2641: 
 2642: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2643: 	compiler->mode32 = 0;
 2644: 	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2645: 
 2646: 	if (emit_load_imm64(compiler, reg, init_value))
 2647: 		return NULL;
 2648: #else
 2649: 	if (dst == SLJIT_UNUSED)
 2650: 		dst = TMP_REGISTER;
 2651: 
 2652: 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
 2653: 		return NULL;
 2654: #endif
 2655: 
 2656: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2657: 	PTR_FAIL_IF(!buf);
 2658: 
 2659: 	*buf++ = 0;
 2660: 	*buf++ = 1;
 2661: 
 2662: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2663: 	if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
 2664: 		if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
 2665: 			return NULL;
 2666: #endif
 2667: 
 2668: 	return const_;
 2669: }
 2670: 
 2671: SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
 2672: {
 2673: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2674: 	*(sljit_w*)addr = new_addr - (addr + 4);
 2675: #else
 2676: 	*(sljit_uw*)addr = new_addr;
 2677: #endif
 2678: }
 2679: 
 2680: SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
 2681: {
 2682: 	*(sljit_w*)addr = new_constant;
 2683: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>