File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / sljit / sljitNativeX86_common.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:05:52 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_21, HEAD
pcre

    1: /*
    2:  *    Stack-less Just-In-Time compiler
    3:  *
    4:  *    Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5:  *
    6:  * Redistribution and use in source and binary forms, with or without modification, are
    7:  * permitted provided that the following conditions are met:
    8:  *
    9:  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10:  *      conditions and the following disclaimer.
   11:  *
   12:  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13:  *      of conditions and the following disclaimer in the documentation and/or other materials
   14:  *      provided with the distribution.
   15:  *
   16:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19:  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21:  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24:  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25:  */
   26: 
   27: SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
   28: {
   29: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   30: 	return "x86-32";
   31: #else
   32: 	return "x86-64";
   33: #endif
   34: }
   35: 
   36: /*
   37:    32b register indexes:
   38:      0 - EAX
   39:      1 - ECX
   40:      2 - EDX
   41:      3 - EBX
   42:      4 - none
   43:      5 - EBP
   44:      6 - ESI
   45:      7 - EDI
   46: */
   47: 
   48: /*
   49:    64b register indexes:
   50:      0 - RAX
   51:      1 - RCX
   52:      2 - RDX
   53:      3 - RBX
   54:      4 - none
   55:      5 - RBP
   56:      6 - RSI
   57:      7 - RDI
   58:      8 - R8   - From now on REX prefix is required
   59:      9 - R9
   60:     10 - R10
   61:     11 - R11
   62:     12 - R12
   63:     13 - R13
   64:     14 - R14
   65:     15 - R15
   66: */
   67: 
   68: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   69: 
   70: /* Last register + 1. */
   71: #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
   72: 
   73: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
   74:   0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
   75: };
   76: 
   77: #define CHECK_EXTRA_REGS(p, w, do) \
   78: 	if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
   79: 		w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
   80: 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   81: 		do; \
   82: 	} \
   83: 	else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \
   84: 		w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \
   85: 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
   86: 		do; \
   87: 	}
   88: 
   89: #else /* SLJIT_CONFIG_X86_32 */
   90: 
   91: /* Last register + 1. */
   92: #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
   93: #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
   94: #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
   95: 
   96: /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
   97:    Note: avoid to use r12 and r13 for memory addessing
   98:    therefore r12 is better for GENERAL_EREG than GENERAL_REG. */
   99: #ifndef _WIN64
  100: /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
  101: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
  102:   0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
  103: };
  104: /* low-map. reg_map & 0x7. */
  105: static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  106:   0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
  107: };
  108: #else
  109: /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
  110: static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
  111:   0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
  112: };
  113: /* low-map. reg_map & 0x7. */
  114: static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
  115:   0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  4,  7, 2,  0, 1
  116: };
  117: #endif
  118: 
  119: #define REX_W		0x48
  120: #define REX_R		0x44
  121: #define REX_X		0x42
  122: #define REX_B		0x41
  123: #define REX		0x40
  124: 
  125: typedef unsigned int sljit_uhw;
  126: typedef int sljit_hw;
  127: 
  128: #define IS_HALFWORD(x)		((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
  129: #define NOT_HALFWORD(x)		((x) > 0x7fffffffll || (x) < -0x80000000ll)
  130: 
  131: #define CHECK_EXTRA_REGS(p, w, do)
  132: 
  133: #endif /* SLJIT_CONFIG_X86_32 */
  134: 
  135: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  136: #define TMP_FREG	(SLJIT_FLOAT_REG4 + 1)
  137: #endif
  138: 
  139: /* Size flags for emit_x86_instruction: */
  140: #define EX86_BIN_INS		0x0010
  141: #define EX86_SHIFT_INS		0x0020
  142: #define EX86_REX		0x0040
  143: #define EX86_NO_REXW		0x0080
  144: #define EX86_BYTE_ARG		0x0100
  145: #define EX86_HALF_ARG		0x0200
  146: #define EX86_PREF_66		0x0400
  147: 
  148: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  149: #define EX86_PREF_F2		0x0800
  150: #define EX86_SSE2		0x1000
  151: #endif
  152: 
  153: #define INC_SIZE(s)			(*buf++ = (s), compiler->size += (s))
  154: #define INC_CSIZE(s)			(*code++ = (s), compiler->size += (s))
  155: 
  156: #define PUSH_REG(r)			(*buf++ = (0x50 + (r)))
  157: #define POP_REG(r)			(*buf++ = (0x58 + (r)))
  158: #define RET()				(*buf++ = (0xc3))
  159: #define RETN(n)				(*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
  160: /* r32, r/m32 */
  161: #define MOV_RM(mod, reg, rm)		(*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
  162: 
  163: static sljit_ub get_jump_code(int type)
  164: {
  165: 	switch (type) {
  166: 	case SLJIT_C_EQUAL:
  167: 	case SLJIT_C_FLOAT_EQUAL:
  168: 		return 0x84;
  169: 
  170: 	case SLJIT_C_NOT_EQUAL:
  171: 	case SLJIT_C_FLOAT_NOT_EQUAL:
  172: 		return 0x85;
  173: 
  174: 	case SLJIT_C_LESS:
  175: 	case SLJIT_C_FLOAT_LESS:
  176: 		return 0x82;
  177: 
  178: 	case SLJIT_C_GREATER_EQUAL:
  179: 	case SLJIT_C_FLOAT_GREATER_EQUAL:
  180: 		return 0x83;
  181: 
  182: 	case SLJIT_C_GREATER:
  183: 	case SLJIT_C_FLOAT_GREATER:
  184: 		return 0x87;
  185: 
  186: 	case SLJIT_C_LESS_EQUAL:
  187: 	case SLJIT_C_FLOAT_LESS_EQUAL:
  188: 		return 0x86;
  189: 
  190: 	case SLJIT_C_SIG_LESS:
  191: 		return 0x8c;
  192: 
  193: 	case SLJIT_C_SIG_GREATER_EQUAL:
  194: 		return 0x8d;
  195: 
  196: 	case SLJIT_C_SIG_GREATER:
  197: 		return 0x8f;
  198: 
  199: 	case SLJIT_C_SIG_LESS_EQUAL:
  200: 		return 0x8e;
  201: 
  202: 	case SLJIT_C_OVERFLOW:
  203: 	case SLJIT_C_MUL_OVERFLOW:
  204: 		return 0x80;
  205: 
  206: 	case SLJIT_C_NOT_OVERFLOW:
  207: 	case SLJIT_C_MUL_NOT_OVERFLOW:
  208: 		return 0x81;
  209: 
  210: 	case SLJIT_C_FLOAT_NAN:
  211: 		return 0x8a;
  212: 
  213: 	case SLJIT_C_FLOAT_NOT_NAN:
  214: 		return 0x8b;
  215: 	}
  216: 	return 0;
  217: }
  218: 
  219: static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
  220: 
  221: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  222: static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
  223: #endif
  224: 
  225: static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
  226: {
  227: 	int short_jump;
  228: 	sljit_uw label_addr;
  229: 
  230: 	if (jump->flags & JUMP_LABEL)
  231: 		label_addr = (sljit_uw)(code + jump->u.label->size);
  232: 	else
  233: 		label_addr = jump->u.target;
  234: 	short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
  235: 
  236: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  237: 	if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
  238: 		return generate_far_jump_code(jump, code_ptr, type);
  239: #endif
  240: 
  241: 	if (type == SLJIT_JUMP) {
  242: 		if (short_jump)
  243: 			*code_ptr++ = 0xeb;
  244: 		else
  245: 			*code_ptr++ = 0xe9;
  246: 		jump->addr++;
  247: 	}
  248: 	else if (type >= SLJIT_FAST_CALL) {
  249: 		short_jump = 0;
  250: 		*code_ptr++ = 0xe8;
  251: 		jump->addr++;
  252: 	}
  253: 	else if (short_jump) {
  254: 		*code_ptr++ = get_jump_code(type) - 0x10;
  255: 		jump->addr++;
  256: 	}
  257: 	else {
  258: 		*code_ptr++ = 0x0f;
  259: 		*code_ptr++ = get_jump_code(type);
  260: 		jump->addr += 2;
  261: 	}
  262: 
  263: 	if (short_jump) {
  264: 		jump->flags |= PATCH_MB;
  265: 		code_ptr += sizeof(sljit_b);
  266: 	} else {
  267: 		jump->flags |= PATCH_MW;
  268: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  269: 		code_ptr += sizeof(sljit_w);
  270: #else
  271: 		code_ptr += sizeof(sljit_hw);
  272: #endif
  273: 	}
  274: 
  275: 	return code_ptr;
  276: }
  277: 
  278: SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  279: {
  280: 	struct sljit_memory_fragment *buf;
  281: 	sljit_ub *code;
  282: 	sljit_ub *code_ptr;
  283: 	sljit_ub *buf_ptr;
  284: 	sljit_ub *buf_end;
  285: 	sljit_ub len;
  286: 
  287: 	struct sljit_label *label;
  288: 	struct sljit_jump *jump;
  289: 	struct sljit_const *const_;
  290: 
  291: 	CHECK_ERROR_PTR();
  292: 	check_sljit_generate_code(compiler);
  293: 	reverse_buf(compiler);
  294: 
  295: 	/* Second code generation pass. */
  296: 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
  297: 	PTR_FAIL_WITH_EXEC_IF(code);
  298: 	buf = compiler->buf;
  299: 
  300: 	code_ptr = code;
  301: 	label = compiler->labels;
  302: 	jump = compiler->jumps;
  303: 	const_ = compiler->consts;
  304: 	do {
  305: 		buf_ptr = buf->memory;
  306: 		buf_end = buf_ptr + buf->used_size;
  307: 		do {
  308: 			len = *buf_ptr++;
  309: 			if (len > 0) {
  310: 				/* The code is already generated. */
  311: 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
  312: 				code_ptr += len;
  313: 				buf_ptr += len;
  314: 			}
  315: 			else {
  316: 				if (*buf_ptr >= 4) {
  317: 					jump->addr = (sljit_uw)code_ptr;
  318: 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  319: 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
  320: 					else
  321: 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
  322: 					jump = jump->next;
  323: 				}
  324: 				else if (*buf_ptr == 0) {
  325: 					label->addr = (sljit_uw)code_ptr;
  326: 					label->size = code_ptr - code;
  327: 					label = label->next;
  328: 				}
  329: 				else if (*buf_ptr == 1) {
  330: 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
  331: 					const_ = const_->next;
  332: 				}
  333: 				else {
  334: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  335: 					*code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
  336: 					buf_ptr++;
  337: 					*(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
  338: 					code_ptr += sizeof(sljit_w);
  339: 					buf_ptr += sizeof(sljit_w) - 1;
  340: #else
  341: 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
  342: 					buf_ptr += sizeof(sljit_w);
  343: #endif
  344: 				}
  345: 				buf_ptr++;
  346: 			}
  347: 		} while (buf_ptr < buf_end);
  348: 		SLJIT_ASSERT(buf_ptr == buf_end);
  349: 		buf = buf->next;
  350: 	} while (buf);
  351: 
  352: 	SLJIT_ASSERT(!label);
  353: 	SLJIT_ASSERT(!jump);
  354: 	SLJIT_ASSERT(!const_);
  355: 
  356: 	jump = compiler->jumps;
  357: 	while (jump) {
  358: 		if (jump->flags & PATCH_MB) {
  359: 			SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
  360: 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
  361: 		} else if (jump->flags & PATCH_MW) {
  362: 			if (jump->flags & JUMP_LABEL) {
  363: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  364: 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
  365: #else
  366: 				SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  367: 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
  368: #endif
  369: 			}
  370: 			else {
  371: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  372: 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
  373: #else
  374: 				SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
  375: 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
  376: #endif
  377: 			}
  378: 		}
  379: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  380: 		else if (jump->flags & PATCH_MD)
  381: 			*(sljit_w*)jump->addr = jump->u.label->addr;
  382: #endif
  383: 
  384: 		jump = jump->next;
  385: 	}
  386: 
  387: 	/* Maybe we waste some space because of short jumps. */
  388: 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
  389: 	compiler->error = SLJIT_ERR_COMPILED;
  390: 	compiler->executable_size = compiler->size;
  391: 	return (void*)code;
  392: }
  393: 
  394: /* --------------------------------------------------------------------- */
  395: /*  Operators                                                            */
  396: /* --------------------------------------------------------------------- */
  397: 
  398: static int emit_cum_binary(struct sljit_compiler *compiler,
  399: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  400: 	int dst, sljit_w dstw,
  401: 	int src1, sljit_w src1w,
  402: 	int src2, sljit_w src2w);
  403: 
  404: static int emit_non_cum_binary(struct sljit_compiler *compiler,
  405: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
  406: 	int dst, sljit_w dstw,
  407: 	int src1, sljit_w src1w,
  408: 	int src2, sljit_w src2w);
  409: 
  410: static int emit_mov(struct sljit_compiler *compiler,
  411: 	int dst, sljit_w dstw,
  412: 	int src, sljit_w srcw);
  413: 
  414: static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
  415: {
  416: 	sljit_ub *buf;
  417: 
  418: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  419: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  420: 	FAIL_IF(!buf);
  421: 	INC_SIZE(5);
  422: 	*buf++ = 0x9c; /* pushfd */
  423: #else
  424: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  425: 	FAIL_IF(!buf);
  426: 	INC_SIZE(6);
  427: 	*buf++ = 0x9c; /* pushfq */
  428: 	*buf++ = 0x48;
  429: #endif
  430: 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
  431: 	*buf++ = 0x64;
  432: 	*buf++ = 0x24;
  433: 	*buf++ = sizeof(sljit_w);
  434: 	compiler->flags_saved = 1;
  435: 	return SLJIT_SUCCESS;
  436: }
  437: 
  438: static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
  439: {
  440: 	sljit_ub *buf;
  441: 
  442: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  443: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  444: 	FAIL_IF(!buf);
  445: 	INC_SIZE(5);
  446: #else
  447: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
  448: 	FAIL_IF(!buf);
  449: 	INC_SIZE(6);
  450: 	*buf++ = 0x48;
  451: #endif
  452: 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
  453: 	*buf++ = 0x64;
  454: 	*buf++ = 0x24;
  455: 	*buf++ = (sljit_ub)-(int)sizeof(sljit_w);
  456: 	*buf++ = 0x9d; /* popfd / popfq */
  457: 	compiler->flags_saved = keep_flags;
  458: 	return SLJIT_SUCCESS;
  459: }
  460: 
  461: #ifdef _WIN32
  462: #include <malloc.h>
  463: 
  464: static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
  465: {
  466: 	/* Workaround for calling _chkstk. */
  467: 	alloca(local_size);
  468: }
  469: #endif
  470: 
  471: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  472: #include "sljitNativeX86_32.c"
  473: #else
  474: #include "sljitNativeX86_64.c"
  475: #endif
  476: 
  477: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
  478: {
  479: 	sljit_ub *buf;
  480: 
  481: 	CHECK_ERROR();
  482: 	check_sljit_emit_op0(compiler, op);
  483: 
  484: 	op = GET_OPCODE(op);
  485: 	switch (op) {
  486: 	case SLJIT_BREAKPOINT:
  487: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  488: 		FAIL_IF(!buf);
  489: 		INC_SIZE(1);
  490: 		*buf = 0xcc;
  491: 		break;
  492: 	case SLJIT_NOP:
  493: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  494: 		FAIL_IF(!buf);
  495: 		INC_SIZE(1);
  496: 		*buf = 0x90;
  497: 		break;
  498: 	}
  499: 
  500: 	return SLJIT_SUCCESS;
  501: }
  502: 
  503: static int emit_mov(struct sljit_compiler *compiler,
  504: 	int dst, sljit_w dstw,
  505: 	int src, sljit_w srcw)
  506: {
  507: 	sljit_ub* code;
  508: 
  509: 	if (dst == SLJIT_UNUSED) {
  510: 		/* No destination, doesn't need to setup flags. */
  511: 		if (src & SLJIT_MEM) {
  512: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  513: 			FAIL_IF(!code);
  514: 			*code = 0x8b;
  515: 		}
  516: 		return SLJIT_SUCCESS;
  517: 	}
  518: 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
  519: 		code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  520: 		FAIL_IF(!code);
  521: 		*code = 0x89;
  522: 		return SLJIT_SUCCESS;
  523: 	}
  524: 	if (src & SLJIT_IMM) {
  525: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  526: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  527: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  528: #else
  529: 			if (!compiler->mode32) {
  530: 				if (NOT_HALFWORD(srcw))
  531: 					return emit_load_imm64(compiler, dst, srcw);
  532: 			}
  533: 			else
  534: 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
  535: #endif
  536: 		}
  537: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  538: 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  539: 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
  540: 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
  541: 			FAIL_IF(!code);
  542: 			*code = 0x89;
  543: 			return SLJIT_SUCCESS;
  544: 		}
  545: #endif
  546: 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  547: 		FAIL_IF(!code);
  548: 		*code = 0xc7;
  549: 		return SLJIT_SUCCESS;
  550: 	}
  551: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  552: 		code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  553: 		FAIL_IF(!code);
  554: 		*code = 0x8b;
  555: 		return SLJIT_SUCCESS;
  556: 	}
  557: 
  558: 	/* Memory to memory move. Requires two instruction. */
  559: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
  560: 	FAIL_IF(!code);
  561: 	*code = 0x8b;
  562: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
  563: 	FAIL_IF(!code);
  564: 	*code = 0x89;
  565: 	return SLJIT_SUCCESS;
  566: }
  567: 
  568: #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  569: 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  570: 
  571: #define ENCODE_PREFIX(prefix) \
  572: 	do { \
  573: 		code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
  574: 		FAIL_IF(!code); \
  575: 		INC_CSIZE(1); \
  576: 		*code = (prefix); \
  577: 	} while (0)
  578: 
  579: static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
  580: 	int dst, sljit_w dstw,
  581: 	int src, sljit_w srcw)
  582: {
  583: 	sljit_ub* code;
  584: 	int dst_r;
  585: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  586: 	int work_r;
  587: #endif
  588: 
  589: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  590: 	compiler->mode32 = 0;
  591: #endif
  592: 
  593: 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  594: 		return SLJIT_SUCCESS; /* Empty instruction. */
  595: 
  596: 	if (src & SLJIT_IMM) {
  597: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  598: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  599: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  600: #else
  601: 			return emit_load_imm64(compiler, dst, srcw);
  602: #endif
  603: 		}
  604: 		code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  605: 		FAIL_IF(!code);
  606: 		*code = 0xc6;
  607: 		return SLJIT_SUCCESS;
  608: 	}
  609: 
  610: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  611: 
  612: 	if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
  613: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  614: 		if (reg_map[src] >= 4) {
  615: 			SLJIT_ASSERT(dst_r == TMP_REGISTER);
  616: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
  617: 		} else
  618: 			dst_r = src;
  619: #else
  620: 		dst_r = src;
  621: #endif
  622: 	}
  623: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  624: 	else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
  625: 		/* src, dst are registers. */
  626: 		SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
  627: 		if (reg_map[dst] < 4) {
  628: 			if (dst != src)
  629: 				EMIT_MOV(compiler, dst, 0, src, 0);
  630: 			code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  631: 			FAIL_IF(!code);
  632: 			*code++ = 0x0f;
  633: 			*code = sign ? 0xbe : 0xb6;
  634: 		}
  635: 		else {
  636: 			if (dst != src)
  637: 				EMIT_MOV(compiler, dst, 0, src, 0);
  638: 			if (sign) {
  639: 				/* shl reg, 24 */
  640: 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  641: 				FAIL_IF(!code);
  642: 				*code |= 0x4 << 3;
  643: 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  644: 				FAIL_IF(!code);
  645: 				/* shr/sar reg, 24 */
  646: 				*code |= 0x7 << 3;
  647: 			}
  648: 			else {
  649: 				/* and dst, 0xff */
  650: 				code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
  651: 				FAIL_IF(!code);
  652: 				*(code + 1) |= 0x4 << 3;
  653: 			}
  654: 		}
  655: 		return SLJIT_SUCCESS;
  656: 	}
  657: #endif
  658: 	else {
  659: 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  660: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  661: 		FAIL_IF(!code);
  662: 		*code++ = 0x0f;
  663: 		*code = sign ? 0xbe : 0xb6;
  664: 	}
  665: 
  666: 	if (dst & SLJIT_MEM) {
  667: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  668: 		if (dst_r == TMP_REGISTER) {
  669: 			/* Find a non-used register, whose reg_map[src] < 4. */
  670: 			if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
  671: 				if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
  672: 					work_r = SLJIT_TEMPORARY_REG3;
  673: 				else
  674: 					work_r = SLJIT_TEMPORARY_REG2;
  675: 			}
  676: 			else {
  677: 				if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
  678: 					work_r = SLJIT_TEMPORARY_REG1;
  679: 				else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
  680: 					work_r = SLJIT_TEMPORARY_REG3;
  681: 				else
  682: 					work_r = SLJIT_TEMPORARY_REG2;
  683: 			}
  684: 
  685: 			if (work_r == SLJIT_TEMPORARY_REG1) {
  686: 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  687: 			}
  688: 			else {
  689: 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  690: 				FAIL_IF(!code);
  691: 				*code = 0x87;
  692: 			}
  693: 
  694: 			code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  695: 			FAIL_IF(!code);
  696: 			*code = 0x88;
  697: 
  698: 			if (work_r == SLJIT_TEMPORARY_REG1) {
  699: 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
  700: 			}
  701: 			else {
  702: 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  703: 				FAIL_IF(!code);
  704: 				*code = 0x87;
  705: 			}
  706: 		}
  707: 		else {
  708: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  709: 			FAIL_IF(!code);
  710: 			*code = 0x88;
  711: 		}
  712: #else
  713: 		code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  714: 		FAIL_IF(!code);
  715: 		*code = 0x88;
  716: #endif
  717: 	}
  718: 
  719: 	return SLJIT_SUCCESS;
  720: }
  721: 
  722: static int emit_mov_half(struct sljit_compiler *compiler, int sign,
  723: 	int dst, sljit_w dstw,
  724: 	int src, sljit_w srcw)
  725: {
  726: 	sljit_ub* code;
  727: 	int dst_r;
  728: 
  729: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  730: 	compiler->mode32 = 0;
  731: #endif
  732: 
  733: 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  734: 		return SLJIT_SUCCESS; /* Empty instruction. */
  735: 
  736: 	if (src & SLJIT_IMM) {
  737: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  738: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  739: 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
  740: #else
  741: 			return emit_load_imm64(compiler, dst, srcw);
  742: #endif
  743: 		}
  744: 		code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  745: 		FAIL_IF(!code);
  746: 		*code = 0xc7;
  747: 		return SLJIT_SUCCESS;
  748: 	}
  749: 
  750: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
  751: 
  752: 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
  753: 		dst_r = src;
  754: 	else {
  755: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  756: 		FAIL_IF(!code);
  757: 		*code++ = 0x0f;
  758: 		*code = sign ? 0xbf : 0xb7;
  759: 	}
  760: 
  761: 	if (dst & SLJIT_MEM) {
  762: 		code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  763: 		FAIL_IF(!code);
  764: 		*code = 0x89;
  765: 	}
  766: 
  767: 	return SLJIT_SUCCESS;
  768: }
  769: 
  770: static int emit_unary(struct sljit_compiler *compiler, int un_index,
  771: 	int dst, sljit_w dstw,
  772: 	int src, sljit_w srcw)
  773: {
  774: 	sljit_ub* code;
  775: 
  776: 	if (dst == SLJIT_UNUSED) {
  777: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  778: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  779: 		FAIL_IF(!code);
  780: 		*code++ = 0xf7;
  781: 		*code |= (un_index) << 3;
  782: 		return SLJIT_SUCCESS;
  783: 	}
  784: 	if (dst == src && dstw == srcw) {
  785: 		/* Same input and output */
  786: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  787: 		FAIL_IF(!code);
  788: 		*code++ = 0xf7;
  789: 		*code |= (un_index) << 3;
  790: 		return SLJIT_SUCCESS;
  791: 	}
  792: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  793: 		EMIT_MOV(compiler, dst, 0, src, srcw);
  794: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  795: 		FAIL_IF(!code);
  796: 		*code++ = 0xf7;
  797: 		*code |= (un_index) << 3;
  798: 		return SLJIT_SUCCESS;
  799: 	}
  800: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  801: 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  802: 	FAIL_IF(!code);
  803: 	*code++ = 0xf7;
  804: 	*code |= (un_index) << 3;
  805: 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  806: 	return SLJIT_SUCCESS;
  807: }
  808: 
  809: static int emit_not_with_flags(struct sljit_compiler *compiler,
  810: 	int dst, sljit_w dstw,
  811: 	int src, sljit_w srcw)
  812: {
  813: 	sljit_ub* code;
  814: 
  815: 	if (dst == SLJIT_UNUSED) {
  816: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  817: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  818: 		FAIL_IF(!code);
  819: 		*code++ = 0xf7;
  820: 		*code |= 0x2 << 3;
  821: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  822: 		FAIL_IF(!code);
  823: 		*code = 0x0b;
  824: 		return SLJIT_SUCCESS;
  825: 	}
  826: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  827: 		EMIT_MOV(compiler, dst, 0, src, srcw);
  828: 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  829: 		FAIL_IF(!code);
  830: 		*code++ = 0xf7;
  831: 		*code |= 0x2 << 3;
  832: 		code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  833: 		FAIL_IF(!code);
  834: 		*code = 0x0b;
  835: 		return SLJIT_SUCCESS;
  836: 	}
  837: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  838: 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  839: 	FAIL_IF(!code);
  840: 	*code++ = 0xf7;
  841: 	*code |= 0x2 << 3;
  842: 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
  843: 	FAIL_IF(!code);
  844: 	*code = 0x0b;
  845: 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
  846: 	return SLJIT_SUCCESS;
  847: }
  848: 
  849: static int emit_clz(struct sljit_compiler *compiler, int op,
  850: 	int dst, sljit_w dstw,
  851: 	int src, sljit_w srcw)
  852: {
  853: 	sljit_ub* code;
  854: 	int dst_r;
  855: 
  856: 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  857: 		/* Just set the zero flag. */
  858: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  859: 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
  860: 		FAIL_IF(!code);
  861: 		*code++ = 0xf7;
  862: 		*code |= 0x2 << 3;
  863: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  864: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
  865: #else
  866: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
  867: #endif
  868: 		FAIL_IF(!code);
  869: 		*code |= 0x5 << 3;
  870: 		return SLJIT_SUCCESS;
  871: 	}
  872: 
  873: 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  874: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
  875: 		src = TMP_REGISTER;
  876: 		srcw = 0;
  877: 	}
  878: 
  879: 	code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
  880: 	FAIL_IF(!code);
  881: 	*code++ = 0x0f;
  882: 	*code = 0xbd;
  883: 
  884: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  885: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
  886: 		dst_r = dst;
  887: 	else {
  888: 		/* Find an unused temporary register. */
  889: 		if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
  890: 			dst_r = SLJIT_TEMPORARY_REG1;
  891: 		else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
  892: 			dst_r = SLJIT_TEMPORARY_REG2;
  893: 		else
  894: 			dst_r = SLJIT_TEMPORARY_REG3;
  895: 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
  896: 	}
  897: 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
  898: #else
  899: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
  900: 	compiler->mode32 = 0;
  901: 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
  902: 	compiler->mode32 = op & SLJIT_INT_OP;
  903: #endif
  904: 
  905: 	code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
  906: 	FAIL_IF(!code);
  907: 	*code++ = 0x0f;
  908: 	*code = 0x45;
  909: 
  910: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  911: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
  912: #else
  913: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
  914: #endif
  915: 	FAIL_IF(!code);
  916: 	*(code + 1) |= 0x6 << 3;
  917: 
  918: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  919: 	if (dst & SLJIT_MEM) {
  920: 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  921: 		FAIL_IF(!code);
  922: 		*code = 0x87;
  923: 	}
  924: #else
  925: 	if (dst & SLJIT_MEM)
  926: 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
  927: #endif
  928: 	return SLJIT_SUCCESS;
  929: }
  930: 
  931: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
  932: 	int dst, sljit_w dstw,
  933: 	int src, sljit_w srcw)
  934: {
  935: 	sljit_ub* code;
  936: 	int update = 0;
  937: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  938: 	int dst_is_ereg = 0;
  939: 	int src_is_ereg = 0;
  940: #else
  941: 	#define src_is_ereg 0
  942: #endif
  943: 
  944: 	CHECK_ERROR();
  945: 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
  946: 
  947: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  948: 	compiler->mode32 = op & SLJIT_INT_OP;
  949: #endif
  950: 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
  951: 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
  952: 
  953: 	if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
  954: 		op = GET_OPCODE(op);
  955: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  956: 		compiler->mode32 = 0;
  957: #endif
  958: 
  959: 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
  960: 		if (op >= SLJIT_MOVU) {
  961: 			update = 1;
  962: 			op -= 7;
  963: 		}
  964: 
  965: 		if (src & SLJIT_IMM) {
  966: 			switch (op) {
  967: 			case SLJIT_MOV_UB:
  968: 				srcw = (unsigned char)srcw;
  969: 				break;
  970: 			case SLJIT_MOV_SB:
  971: 				srcw = (signed char)srcw;
  972: 				break;
  973: 			case SLJIT_MOV_UH:
  974: 				srcw = (unsigned short)srcw;
  975: 				break;
  976: 			case SLJIT_MOV_SH:
  977: 				srcw = (signed short)srcw;
  978: 				break;
  979: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  980: 			case SLJIT_MOV_UI:
  981: 				srcw = (unsigned int)srcw;
  982: 				break;
  983: 			case SLJIT_MOV_SI:
  984: 				srcw = (signed int)srcw;
  985: 				break;
  986: #endif
  987: 			}
  988: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  989: 			if (SLJIT_UNLIKELY(dst_is_ereg))
  990: 				return emit_mov(compiler, dst, dstw, src, srcw);
  991: #endif
  992: 		}
  993: 
  994: 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
  995: 			code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
  996: 			FAIL_IF(!code);
  997: 			*code = 0x8d;
  998: 			src &= SLJIT_MEM | 0xf;
  999: 			srcw = 0;
 1000: 		}
 1001: 
 1002: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1003: 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
 1004: 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
 1005: 			dst = TMP_REGISTER;
 1006: 		}
 1007: #endif
 1008: 
 1009: 		switch (op) {
 1010: 		case SLJIT_MOV:
 1011: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1012: 		case SLJIT_MOV_UI:
 1013: 		case SLJIT_MOV_SI:
 1014: #endif
 1015: 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
 1016: 			break;
 1017: 		case SLJIT_MOV_UB:
 1018: 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
 1019: 			break;
 1020: 		case SLJIT_MOV_SB:
 1021: 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
 1022: 			break;
 1023: 		case SLJIT_MOV_UH:
 1024: 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
 1025: 			break;
 1026: 		case SLJIT_MOV_SH:
 1027: 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
 1028: 			break;
 1029: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1030: 		case SLJIT_MOV_UI:
 1031: 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
 1032: 			break;
 1033: 		case SLJIT_MOV_SI:
 1034: 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
 1035: 			break;
 1036: #endif
 1037: 		}
 1038: 
 1039: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1040: 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
 1041: 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
 1042: #endif
 1043: 
 1044: 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
 1045: 			code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
 1046: 			FAIL_IF(!code);
 1047: 			*code = 0x8d;
 1048: 		}
 1049: 		return SLJIT_SUCCESS;
 1050: 	}
 1051: 
 1052: 	if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1053: 		compiler->flags_saved = 0;
 1054: 
 1055: 	switch (GET_OPCODE(op)) {
 1056: 	case SLJIT_NOT:
 1057: 		if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
 1058: 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
 1059: 		return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
 1060: 
 1061: 	case SLJIT_NEG:
 1062: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1063: 			FAIL_IF(emit_save_flags(compiler));
 1064: 		return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
 1065: 
 1066: 	case SLJIT_CLZ:
 1067: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1068: 			FAIL_IF(emit_save_flags(compiler));
 1069: 		return emit_clz(compiler, op, dst, dstw, src, srcw);
 1070: 	}
 1071: 
 1072: 	return SLJIT_SUCCESS;
 1073: 
 1074: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1075: 	#undef src_is_ereg
 1076: #endif
 1077: }
 1078: 
 1079: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1080: 
 1081: #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1082: 	if (IS_HALFWORD(immw) || compiler->mode32) { \
 1083: 		code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1084: 		FAIL_IF(!code); \
 1085: 		*(code + 1) |= (_op_imm_); \
 1086: 	} \
 1087: 	else { \
 1088: 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
 1089: 		code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
 1090: 		FAIL_IF(!code); \
 1091: 		*code = (_op_mr_); \
 1092: 	}
 1093: 
 1094: #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1095: 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
 1096: 
 1097: #else
 1098: 
 1099: #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
 1100: 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
 1101: 	FAIL_IF(!code); \
 1102: 	*(code + 1) |= (_op_imm_);
 1103: 
 1104: #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
 1105: 	FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
 1106: 
 1107: #endif
 1108: 
 1109: static int emit_cum_binary(struct sljit_compiler *compiler,
 1110: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1111: 	int dst, sljit_w dstw,
 1112: 	int src1, sljit_w src1w,
 1113: 	int src2, sljit_w src2w)
 1114: {
 1115: 	sljit_ub* code;
 1116: 
 1117: 	if (dst == SLJIT_UNUSED) {
 1118: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1119: 		if (src2 & SLJIT_IMM) {
 1120: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1121: 		}
 1122: 		else {
 1123: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1124: 			FAIL_IF(!code);
 1125: 			*code = op_rm;
 1126: 		}
 1127: 		return SLJIT_SUCCESS;
 1128: 	}
 1129: 
 1130: 	if (dst == src1 && dstw == src1w) {
 1131: 		if (src2 & SLJIT_IMM) {
 1132: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1133: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1134: #else
 1135: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1136: #endif
 1137: 				BINARY_EAX_IMM(op_eax_imm, src2w);
 1138: 			}
 1139: 			else {
 1140: 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1141: 			}
 1142: 		}
 1143: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1144: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1145: 			FAIL_IF(!code);
 1146: 			*code = op_rm;
 1147: 		}
 1148: 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
 1149: 			/* Special exception for sljit_emit_cond_value. */
 1150: 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1151: 			FAIL_IF(!code);
 1152: 			*code = op_mr;
 1153: 		}
 1154: 		else {
 1155: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1156: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1157: 			FAIL_IF(!code);
 1158: 			*code = op_mr;
 1159: 		}
 1160: 		return SLJIT_SUCCESS;
 1161: 	}
 1162: 
 1163: 	/* Only for cumulative operations. */
 1164: 	if (dst == src2 && dstw == src2w) {
 1165: 		if (src1 & SLJIT_IMM) {
 1166: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1167: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1168: #else
 1169: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
 1170: #endif
 1171: 				BINARY_EAX_IMM(op_eax_imm, src1w);
 1172: 			}
 1173: 			else {
 1174: 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
 1175: 			}
 1176: 		}
 1177: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1178: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
 1179: 			FAIL_IF(!code);
 1180: 			*code = op_rm;
 1181: 		}
 1182: 		else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1183: 			code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
 1184: 			FAIL_IF(!code);
 1185: 			*code = op_mr;
 1186: 		}
 1187: 		else {
 1188: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1189: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1190: 			FAIL_IF(!code);
 1191: 			*code = op_mr;
 1192: 		}
 1193: 		return SLJIT_SUCCESS;
 1194: 	}
 1195: 
 1196: 	/* General version. */
 1197: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1198: 		EMIT_MOV(compiler, dst, 0, src1, src1w);
 1199: 		if (src2 & SLJIT_IMM) {
 1200: 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1201: 		}
 1202: 		else {
 1203: 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1204: 			FAIL_IF(!code);
 1205: 			*code = op_rm;
 1206: 		}
 1207: 	}
 1208: 	else {
 1209: 		/* This version requires less memory writing. */
 1210: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1211: 		if (src2 & SLJIT_IMM) {
 1212: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1213: 		}
 1214: 		else {
 1215: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1216: 			FAIL_IF(!code);
 1217: 			*code = op_rm;
 1218: 		}
 1219: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1220: 	}
 1221: 
 1222: 	return SLJIT_SUCCESS;
 1223: }
 1224: 
 1225: static int emit_non_cum_binary(struct sljit_compiler *compiler,
 1226: 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
 1227: 	int dst, sljit_w dstw,
 1228: 	int src1, sljit_w src1w,
 1229: 	int src2, sljit_w src2w)
 1230: {
 1231: 	sljit_ub* code;
 1232: 
 1233: 	if (dst == SLJIT_UNUSED) {
 1234: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1235: 		if (src2 & SLJIT_IMM) {
 1236: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1237: 		}
 1238: 		else {
 1239: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1240: 			FAIL_IF(!code);
 1241: 			*code = op_rm;
 1242: 		}
 1243: 		return SLJIT_SUCCESS;
 1244: 	}
 1245: 
 1246: 	if (dst == src1 && dstw == src1w) {
 1247: 		if (src2 & SLJIT_IMM) {
 1248: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1249: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1250: #else
 1251: 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
 1252: #endif
 1253: 				BINARY_EAX_IMM(op_eax_imm, src2w);
 1254: 			}
 1255: 			else {
 1256: 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
 1257: 			}
 1258: 		}
 1259: 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1260: 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
 1261: 			FAIL_IF(!code);
 1262: 			*code = op_rm;
 1263: 		}
 1264: 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1265: 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
 1266: 			FAIL_IF(!code);
 1267: 			*code = op_mr;
 1268: 		}
 1269: 		else {
 1270: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
 1271: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
 1272: 			FAIL_IF(!code);
 1273: 			*code = op_mr;
 1274: 		}
 1275: 		return SLJIT_SUCCESS;
 1276: 	}
 1277: 
 1278: 	/* General version. */
 1279: 	if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
 1280: 		EMIT_MOV(compiler, dst, 0, src1, src1w);
 1281: 		if (src2 & SLJIT_IMM) {
 1282: 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
 1283: 		}
 1284: 		else {
 1285: 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
 1286: 			FAIL_IF(!code);
 1287: 			*code = op_rm;
 1288: 		}
 1289: 	}
 1290: 	else {
 1291: 		/* This version requires less memory writing. */
 1292: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1293: 		if (src2 & SLJIT_IMM) {
 1294: 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
 1295: 		}
 1296: 		else {
 1297: 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1298: 			FAIL_IF(!code);
 1299: 			*code = op_rm;
 1300: 		}
 1301: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1302: 	}
 1303: 
 1304: 	return SLJIT_SUCCESS;
 1305: }
 1306: 
 1307: static int emit_mul(struct sljit_compiler *compiler,
 1308: 	int dst, sljit_w dstw,
 1309: 	int src1, sljit_w src1w,
 1310: 	int src2, sljit_w src2w)
 1311: {
 1312: 	sljit_ub* code;
 1313: 	int dst_r;
 1314: 
 1315: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1316: 
 1317: 	/* Register destination. */
 1318: 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
 1319: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1320: 		FAIL_IF(!code);
 1321: 		*code++ = 0x0f;
 1322: 		*code = 0xaf;
 1323: 	}
 1324: 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
 1325: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
 1326: 		FAIL_IF(!code);
 1327: 		*code++ = 0x0f;
 1328: 		*code = 0xaf;
 1329: 	}
 1330: 	else if (src1 & SLJIT_IMM) {
 1331: 		if (src2 & SLJIT_IMM) {
 1332: 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
 1333: 			src2 = dst_r;
 1334: 			src2w = 0;
 1335: 		}
 1336: 
 1337: 		if (src1w <= 127 && src1w >= -128) {
 1338: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1339: 			FAIL_IF(!code);
 1340: 			*code = 0x6b;
 1341: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1342: 			FAIL_IF(!code);
 1343: 			INC_CSIZE(1);
 1344: 			*code = (sljit_b)src1w;
 1345: 		}
 1346: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1347: 		else {
 1348: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1349: 			FAIL_IF(!code);
 1350: 			*code = 0x69;
 1351: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1352: 			FAIL_IF(!code);
 1353: 			INC_CSIZE(4);
 1354: 			*(sljit_w*)code = src1w;
 1355: 		}
 1356: #else
 1357: 		else if (IS_HALFWORD(src1w)) {
 1358: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
 1359: 			FAIL_IF(!code);
 1360: 			*code = 0x69;
 1361: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1362: 			FAIL_IF(!code);
 1363: 			INC_CSIZE(4);
 1364: 			*(sljit_hw*)code = (sljit_hw)src1w;
 1365: 		}
 1366: 		else {
 1367: 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1368: 			if (dst_r != src2)
 1369: 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
 1370: 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1371: 			FAIL_IF(!code);
 1372: 			*code++ = 0x0f;
 1373: 			*code = 0xaf;
 1374: 		}
 1375: #endif
 1376: 	}
 1377: 	else if (src2 & SLJIT_IMM) {
 1378: 		/* Note: src1 is NOT immediate. */
 1379: 
 1380: 		if (src2w <= 127 && src2w >= -128) {
 1381: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1382: 			FAIL_IF(!code);
 1383: 			*code = 0x6b;
 1384: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
 1385: 			FAIL_IF(!code);
 1386: 			INC_CSIZE(1);
 1387: 			*code = (sljit_b)src2w;
 1388: 		}
 1389: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 1390: 		else {
 1391: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1392: 			FAIL_IF(!code);
 1393: 			*code = 0x69;
 1394: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1395: 			FAIL_IF(!code);
 1396: 			INC_CSIZE(4);
 1397: 			*(sljit_w*)code = src2w;
 1398: 		}
 1399: #else
 1400: 		else if (IS_HALFWORD(src2w)) {
 1401: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
 1402: 			FAIL_IF(!code);
 1403: 			*code = 0x69;
 1404: 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
 1405: 			FAIL_IF(!code);
 1406: 			INC_CSIZE(4);
 1407: 			*(sljit_hw*)code = (sljit_hw)src2w;
 1408: 		}
 1409: 		else {
 1410: 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
 1411: 			if (dst_r != src1)
 1412: 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1413: 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 1414: 			FAIL_IF(!code);
 1415: 			*code++ = 0x0f;
 1416: 			*code = 0xaf;
 1417: 		}
 1418: #endif
 1419: 	}
 1420: 	else {
 1421: 		/* Neither argument is immediate. */
 1422: 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
 1423: 			dst_r = TMP_REGISTER;
 1424: 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
 1425: 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
 1426: 		FAIL_IF(!code);
 1427: 		*code++ = 0x0f;
 1428: 		*code = 0xaf;
 1429: 	}
 1430: 
 1431: 	if (dst_r == TMP_REGISTER)
 1432: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1433: 
 1434: 	return SLJIT_SUCCESS;
 1435: }
 1436: 
 1437: static int emit_lea_binary(struct sljit_compiler *compiler,
 1438: 	int dst, sljit_w dstw,
 1439: 	int src1, sljit_w src1w,
 1440: 	int src2, sljit_w src2w)
 1441: {
 1442: 	sljit_ub* code;
 1443: 	int dst_r, done = 0;
 1444: 
 1445: 	/* These cases better be left to handled by normal way. */
 1446: 	if (dst == src1 && dstw == src1w)
 1447: 		return SLJIT_ERR_UNSUPPORTED;
 1448: 	if (dst == src2 && dstw == src2w)
 1449: 		return SLJIT_ERR_UNSUPPORTED;
 1450: 
 1451: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 1452: 
 1453: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1454: 		if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1455: 			/* It is not possible to be both SLJIT_LOCALS_REG. */
 1456: 			if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
 1457: 				code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
 1458: 				FAIL_IF(!code);
 1459: 				*code = 0x8d;
 1460: 				done = 1;
 1461: 			}
 1462: 		}
 1463: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1464: 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1465: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
 1466: #else
 1467: 		if (src2 & SLJIT_IMM) {
 1468: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
 1469: #endif
 1470: 			FAIL_IF(!code);
 1471: 			*code = 0x8d;
 1472: 			done = 1;
 1473: 		}
 1474: 	}
 1475: 	else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1476: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1477: 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1478: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
 1479: #else
 1480: 		if (src1 & SLJIT_IMM) {
 1481: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
 1482: #endif
 1483: 			FAIL_IF(!code);
 1484: 			*code = 0x8d;
 1485: 			done = 1;
 1486: 		}
 1487: 	}
 1488: 
 1489: 	if (done) {
 1490: 		if (dst_r == TMP_REGISTER)
 1491: 			return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
 1492: 		return SLJIT_SUCCESS;
 1493: 	}
 1494: 	return SLJIT_ERR_UNSUPPORTED;
 1495: }
 1496: 
 1497: static int emit_cmp_binary(struct sljit_compiler *compiler,
 1498: 	int src1, sljit_w src1w,
 1499: 	int src2, sljit_w src2w)
 1500: {
 1501: 	sljit_ub* code;
 1502: 
 1503: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1504: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1505: #else
 1506: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1507: #endif
 1508: 		BINARY_EAX_IMM(0x3d, src2w);
 1509: 		return SLJIT_SUCCESS;
 1510: 	}
 1511: 
 1512: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1513: 		if (src2 & SLJIT_IMM) {
 1514: 			BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
 1515: 		}
 1516: 		else {
 1517: 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1518: 			FAIL_IF(!code);
 1519: 			*code = 0x3b;
 1520: 		}
 1521: 		return SLJIT_SUCCESS;
 1522: 	}
 1523: 
 1524: 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
 1525: 		code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1526: 		FAIL_IF(!code);
 1527: 		*code = 0x39;
 1528: 		return SLJIT_SUCCESS;
 1529: 	}
 1530: 
 1531: 	if (src2 & SLJIT_IMM) {
 1532: 		if (src1 & SLJIT_IMM) {
 1533: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1534: 			src1 = TMP_REGISTER;
 1535: 			src1w = 0;
 1536: 		}
 1537: 		BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
 1538: 	}
 1539: 	else {
 1540: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1541: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1542: 		FAIL_IF(!code);
 1543: 		*code = 0x3b;
 1544: 	}
 1545: 	return SLJIT_SUCCESS;
 1546: }
 1547: 
 1548: static int emit_test_binary(struct sljit_compiler *compiler,
 1549: 	int src1, sljit_w src1w,
 1550: 	int src2, sljit_w src2w)
 1551: {
 1552: 	sljit_ub* code;
 1553: 
 1554: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1555: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
 1556: #else
 1557: 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
 1558: #endif
 1559: 		BINARY_EAX_IMM(0xa9, src2w);
 1560: 		return SLJIT_SUCCESS;
 1561: 	}
 1562: 
 1563: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1564: 	if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
 1565: #else
 1566: 	if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
 1567: #endif
 1568: 		BINARY_EAX_IMM(0xa9, src1w);
 1569: 		return SLJIT_SUCCESS;
 1570: 	}
 1571: 
 1572: 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
 1573: 		if (src2 & SLJIT_IMM) {
 1574: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1575: 			if (IS_HALFWORD(src2w) || compiler->mode32) {
 1576: 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1577: 				FAIL_IF(!code);
 1578: 				*code = 0xf7;
 1579: 			}
 1580: 			else {
 1581: 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1582: 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
 1583: 				FAIL_IF(!code);
 1584: 				*code = 0x85;
 1585: 			}
 1586: #else
 1587: 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
 1588: 			FAIL_IF(!code);
 1589: 			*code = 0xf7;
 1590: #endif
 1591: 		}
 1592: 		else {
 1593: 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
 1594: 			FAIL_IF(!code);
 1595: 			*code = 0x85;
 1596: 		}
 1597: 		return SLJIT_SUCCESS;
 1598: 	}
 1599: 
 1600: 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
 1601: 		if (src1 & SLJIT_IMM) {
 1602: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1603: 			if (IS_HALFWORD(src1w) || compiler->mode32) {
 1604: 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
 1605: 				FAIL_IF(!code);
 1606: 				*code = 0xf7;
 1607: 			}
 1608: 			else {
 1609: 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
 1610: 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
 1611: 				FAIL_IF(!code);
 1612: 				*code = 0x85;
 1613: 			}
 1614: #else
 1615: 			code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
 1616: 			FAIL_IF(!code);
 1617: 			*code = 0xf7;
 1618: #endif
 1619: 		}
 1620: 		else {
 1621: 			code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
 1622: 			FAIL_IF(!code);
 1623: 			*code = 0x85;
 1624: 		}
 1625: 		return SLJIT_SUCCESS;
 1626: 	}
 1627: 
 1628: 	EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1629: 	if (src2 & SLJIT_IMM) {
 1630: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1631: 		if (IS_HALFWORD(src2w) || compiler->mode32) {
 1632: 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1633: 			FAIL_IF(!code);
 1634: 			*code = 0xf7;
 1635: 		}
 1636: 		else {
 1637: 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
 1638: 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
 1639: 			FAIL_IF(!code);
 1640: 			*code = 0x85;
 1641: 		}
 1642: #else
 1643: 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
 1644: 		FAIL_IF(!code);
 1645: 		*code = 0xf7;
 1646: #endif
 1647: 	}
 1648: 	else {
 1649: 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
 1650: 		FAIL_IF(!code);
 1651: 		*code = 0x85;
 1652: 	}
 1653: 	return SLJIT_SUCCESS;
 1654: }
 1655: 
 1656: static int emit_shift(struct sljit_compiler *compiler,
 1657: 	sljit_ub mode,
 1658: 	int dst, sljit_w dstw,
 1659: 	int src1, sljit_w src1w,
 1660: 	int src2, sljit_w src2w)
 1661: {
 1662: 	sljit_ub* code;
 1663: 
 1664: 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 1665: 		if (dst == src1 && dstw == src1w) {
 1666: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
 1667: 			FAIL_IF(!code);
 1668: 			*code |= mode;
 1669: 			return SLJIT_SUCCESS;
 1670: 		}
 1671: 		if (dst == SLJIT_UNUSED) {
 1672: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1673: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1674: 			FAIL_IF(!code);
 1675: 			*code |= mode;
 1676: 			return SLJIT_SUCCESS;
 1677: 		}
 1678: 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
 1679: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1680: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1681: 			FAIL_IF(!code);
 1682: 			*code |= mode;
 1683: 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1684: 			return SLJIT_SUCCESS;
 1685: 		}
 1686: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
 1687: 			EMIT_MOV(compiler, dst, 0, src1, src1w);
 1688: 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
 1689: 			FAIL_IF(!code);
 1690: 			*code |= mode;
 1691: 			return SLJIT_SUCCESS;
 1692: 		}
 1693: 
 1694: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1695: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
 1696: 		FAIL_IF(!code);
 1697: 		*code |= mode;
 1698: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1699: 		return SLJIT_SUCCESS;
 1700: 	}
 1701: 
 1702: 	if (dst == SLJIT_PREF_SHIFT_REG) {
 1703: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1704: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1705: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1706: 		FAIL_IF(!code);
 1707: 		*code |= mode;
 1708: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1709: 	}
 1710: 	else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 1711: 		if (src1 != dst)
 1712: 			EMIT_MOV(compiler, dst, 0, src1, src1w);
 1713: 		EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
 1714: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1715: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 1716: 		FAIL_IF(!code);
 1717: 		*code |= mode;
 1718: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1719: 	}
 1720: 	else {
 1721: 		/* This case is really difficult, since ecx can be used for
 1722: 		   addressing as well, and we must ensure to work even in that case. */
 1723: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1724: 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
 1725: #else
 1726: 		/* [esp - 4] is reserved for eflags. */
 1727: 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
 1728: #endif
 1729: 
 1730: 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
 1731: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 1732: 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
 1733: 		FAIL_IF(!code);
 1734: 		*code |= mode;
 1735: 
 1736: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1737: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
 1738: #else
 1739: 		/* [esp - 4] is reserved for eflags. */
 1740: 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
 1741: #endif
 1742: 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 1743: 	}
 1744: 
 1745: 	return SLJIT_SUCCESS;
 1746: }
 1747: 
 1748: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
 1749: 	int dst, sljit_w dstw,
 1750: 	int src1, sljit_w src1w,
 1751: 	int src2, sljit_w src2w)
 1752: {
 1753: 	CHECK_ERROR();
 1754: 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 1755: 
 1756: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1757: 	compiler->mode32 = op & SLJIT_INT_OP;
 1758: #endif
 1759: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 1760: 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
 1761: 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
 1762: 
 1763: 	if (GET_OPCODE(op) >= SLJIT_MUL) {
 1764: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1765: 			compiler->flags_saved = 0;
 1766: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1767: 			FAIL_IF(emit_save_flags(compiler));
 1768: 	}
 1769: 
 1770: 	switch (GET_OPCODE(op)) {
 1771: 	case SLJIT_ADD:
 1772: 		if (!GET_FLAGS(op)) {
 1773: 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
 1774: 				return compiler->error;
 1775: 		} 
 1776: 		else
 1777: 			compiler->flags_saved = 0;
 1778: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1779: 			FAIL_IF(emit_save_flags(compiler));
 1780: 		return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
 1781: 			dst, dstw, src1, src1w, src2, src2w);
 1782: 	case SLJIT_ADDC:
 1783: 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1784: 			FAIL_IF(emit_restore_flags(compiler, 1));
 1785: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1786: 			FAIL_IF(emit_save_flags(compiler));
 1787: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1788: 			compiler->flags_saved = 0;
 1789: 		return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
 1790: 			dst, dstw, src1, src1w, src2, src2w);
 1791: 	case SLJIT_SUB:
 1792: 		if (!GET_FLAGS(op)) {
 1793: 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
 1794: 				return compiler->error;
 1795: 		}
 1796: 		else
 1797: 			compiler->flags_saved = 0;
 1798: 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
 1799: 			FAIL_IF(emit_save_flags(compiler));
 1800: 		if (dst == SLJIT_UNUSED)
 1801: 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
 1802: 		return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
 1803: 			dst, dstw, src1, src1w, src2, src2w);
 1804: 	case SLJIT_SUBC:
 1805: 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
 1806: 			FAIL_IF(emit_restore_flags(compiler, 1));
 1807: 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
 1808: 			FAIL_IF(emit_save_flags(compiler));
 1809: 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
 1810: 			compiler->flags_saved = 0;
 1811: 		return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
 1812: 			dst, dstw, src1, src1w, src2, src2w);
 1813: 	case SLJIT_MUL:
 1814: 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
 1815: 	case SLJIT_AND:
 1816: 		if (dst == SLJIT_UNUSED)
 1817: 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
 1818: 		return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
 1819: 			dst, dstw, src1, src1w, src2, src2w);
 1820: 	case SLJIT_OR:
 1821: 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
 1822: 			dst, dstw, src1, src1w, src2, src2w);
 1823: 	case SLJIT_XOR:
 1824: 		return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
 1825: 			dst, dstw, src1, src1w, src2, src2w);
 1826: 	case SLJIT_SHL:
 1827: 		return emit_shift(compiler, 0x4 << 3,
 1828: 			dst, dstw, src1, src1w, src2, src2w);
 1829: 	case SLJIT_LSHR:
 1830: 		return emit_shift(compiler, 0x5 << 3,
 1831: 			dst, dstw, src1, src1w, src2, src2w);
 1832: 	case SLJIT_ASHR:
 1833: 		return emit_shift(compiler, 0x7 << 3,
 1834: 			dst, dstw, src1, src1w, src2, src2w);
 1835: 	}
 1836: 
 1837: 	return SLJIT_SUCCESS;
 1838: }
 1839: 
 1840: /* --------------------------------------------------------------------- */
 1841: /*  Floating point operators                                             */
 1842: /* --------------------------------------------------------------------- */
 1843: 
 1844: #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 1845: static int sse2_available = 0;
 1846: #endif
 1847: 
 1848: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 1849: 
 1850: /* Alignment + 2 * 16 bytes. */
 1851: static sljit_i sse2_data[3 + 4 + 4];
 1852: static sljit_i *sse2_buffer;
 1853: 
 1854: static void init_compiler()
 1855: {
 1856: #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 1857: 	int features = 0;
 1858: #endif
 1859: 
 1860: 	sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
 1861: 	sse2_buffer[0] = 0;
 1862: 	sse2_buffer[1] = 0x80000000;
 1863: 	sse2_buffer[4] = 0xffffffff;
 1864: 	sse2_buffer[5] = 0x7fffffff;
 1865: 
 1866: #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 1867: #ifdef __GNUC__
 1868: 	/* AT&T syntax. */
 1869: 	asm (
 1870: 		"pushl %%ebx\n"
 1871: 		"movl $0x1, %%eax\n"
 1872: 		"cpuid\n"
 1873: 		"popl %%ebx\n"
 1874: 		"movl %%edx, %0\n"
 1875: 		: "=g" (features)
 1876: 		:
 1877: 		: "%eax", "%ecx", "%edx"
 1878: 	);
 1879: #elif defined(_MSC_VER) || defined(__BORLANDC__)
 1880: 	/* Intel syntax. */
 1881: 	__asm {
 1882: 		mov eax, 1
 1883: 		push ebx
 1884: 		cpuid
 1885: 		pop ebx
 1886: 		mov features, edx
 1887: 	}
 1888: #else
 1889: 	#error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
 1890: #endif
 1891: 	sse2_available = (features >> 26) & 0x1;
 1892: #endif
 1893: }
 1894: 
 1895: #endif
 1896: 
 1897: SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
 1898: {
 1899: 	/* Always available. */
 1900: 	return 1;
 1901: }
 1902: 
 1903: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
 1904: 
 1905: static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
 1906: 	int xmm1, int xmm2, sljit_w xmm2w)
 1907: {
 1908: 	sljit_ub *buf;
 1909: 
 1910: 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 1911: 	FAIL_IF(!buf);
 1912: 	*buf++ = 0x0f;
 1913: 	*buf = opcode;
 1914: 	return SLJIT_SUCCESS;
 1915: }
 1916: 
 1917: static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
 1918: 	int xmm1, int xmm2, sljit_w xmm2w)
 1919: {
 1920: 	sljit_ub *buf;
 1921: 
 1922: 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
 1923: 	FAIL_IF(!buf);
 1924: 	*buf++ = 0x0f;
 1925: 	*buf = opcode;
 1926: 	return SLJIT_SUCCESS;
 1927: }
 1928: 
 1929: static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
 1930: 	int dst, int src, sljit_w srcw)
 1931: {
 1932: 	return emit_sse2(compiler, 0x10, dst, src, srcw);
 1933: }
 1934: 
 1935: static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
 1936: 	int dst, sljit_w dstw, int src)
 1937: {
 1938: 	return emit_sse2(compiler, 0x11, src, dst, dstw);
 1939: }
 1940: 
 1941: #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 1942: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 1943: #else
 1944: static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
 1945: #endif
 1946: 	int dst, sljit_w dstw,
 1947: 	int src, sljit_w srcw)
 1948: {
 1949: 	int dst_r;
 1950: 
 1951: 	CHECK_ERROR();
 1952: 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 1953: 
 1954: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 1955: 	compiler->mode32 = 1;
 1956: #endif
 1957: 
 1958: 	if (GET_OPCODE(op) == SLJIT_FCMP) {
 1959: 		compiler->flags_saved = 0;
 1960: 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 1961: 			dst_r = dst;
 1962: 		else {
 1963: 			dst_r = TMP_FREG;
 1964: 			FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
 1965: 		}
 1966: 		return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
 1967: 	}
 1968: 
 1969: 	if (op == SLJIT_FMOV) {
 1970: 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
 1971: 			return emit_sse2_load(compiler, dst, src, srcw);
 1972: 		if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
 1973: 			return emit_sse2_store(compiler, dst, dstw, src);
 1974: 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
 1975: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 1976: 	}
 1977: 
 1978: 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 1979: 		dst_r = dst;
 1980: 		if (dst != src)
 1981: 			FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 1982: 	}
 1983: 	else {
 1984: 		dst_r = TMP_FREG;
 1985: 		FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
 1986: 	}
 1987: 
 1988: 	switch (op) {
 1989: 	case SLJIT_FNEG:
 1990: 		FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
 1991: 		break;
 1992: 
 1993: 	case SLJIT_FABS:
 1994: 		FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
 1995: 		break;
 1996: 	}
 1997: 
 1998: 	if (dst_r == TMP_FREG)
 1999: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2000: 	return SLJIT_SUCCESS;
 2001: }
 2002: 
 2003: #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2004: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2005: #else
 2006: static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
 2007: #endif
 2008: 	int dst, sljit_w dstw,
 2009: 	int src1, sljit_w src1w,
 2010: 	int src2, sljit_w src2w)
 2011: {
 2012: 	int dst_r;
 2013: 
 2014: 	CHECK_ERROR();
 2015: 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2016: 
 2017: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2018: 	compiler->mode32 = 1;
 2019: #endif
 2020: 
 2021: 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
 2022: 		dst_r = dst;
 2023: 		if (dst == src1)
 2024: 			; /* Do nothing here. */
 2025: 		else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
 2026: 			/* Swap arguments. */
 2027: 			src2 = src1;
 2028: 			src2w = src1w;
 2029: 		}
 2030: 		else if (dst != src2)
 2031: 			FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
 2032: 		else {
 2033: 			dst_r = TMP_FREG;
 2034: 			FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2035: 		}
 2036: 	}
 2037: 	else {
 2038: 		dst_r = TMP_FREG;
 2039: 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
 2040: 	}
 2041: 
 2042: 	switch (op) {
 2043: 	case SLJIT_FADD:
 2044: 		FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
 2045: 		break;
 2046: 
 2047: 	case SLJIT_FSUB:
 2048: 		FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
 2049: 		break;
 2050: 
 2051: 	case SLJIT_FMUL:
 2052: 		FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
 2053: 		break;
 2054: 
 2055: 	case SLJIT_FDIV:
 2056: 		FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
 2057: 		break;
 2058: 	}
 2059: 
 2060: 	if (dst_r == TMP_FREG)
 2061: 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
 2062: 	return SLJIT_SUCCESS;
 2063: }
 2064: 
 2065: #endif
 2066: 
 2067: #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
 2068: 
 2069: static int emit_fld(struct sljit_compiler *compiler,
 2070: 	int src, sljit_w srcw)
 2071: {
 2072: 	sljit_ub *buf;
 2073: 
 2074: 	if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2075: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2076: 		FAIL_IF(!buf);
 2077: 		INC_SIZE(2);
 2078: 		*buf++ = 0xd9;
 2079: 		*buf = 0xc0 + src - 1;
 2080: 		return SLJIT_SUCCESS;
 2081: 	}
 2082: 
 2083: 	buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2084: 	FAIL_IF(!buf);
 2085: 	*buf = 0xdd;
 2086: 	return SLJIT_SUCCESS;
 2087: }
 2088: 
 2089: static int emit_fop(struct sljit_compiler *compiler,
 2090: 	sljit_ub st_arg, sljit_ub st_arg2,
 2091: 	sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
 2092: 	int src, sljit_w srcw)
 2093: {
 2094: 	sljit_ub *buf;
 2095: 
 2096: 	if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2097: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2098: 		FAIL_IF(!buf);
 2099: 		INC_SIZE(2);
 2100: 		*buf++ = st_arg;
 2101: 		*buf = st_arg2 + src;
 2102: 		return SLJIT_SUCCESS;
 2103: 	}
 2104: 
 2105: 	buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2106: 	FAIL_IF(!buf);
 2107: 	*buf++ = m64fp_arg;
 2108: 	*buf |= m64fp_arg2;
 2109: 	return SLJIT_SUCCESS;
 2110: }
 2111: 
 2112: static int emit_fop_regs(struct sljit_compiler *compiler,
 2113: 	sljit_ub st_arg, sljit_ub st_arg2,
 2114: 	int src)
 2115: {
 2116: 	sljit_ub *buf;
 2117: 
 2118: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
 2119: 	FAIL_IF(!buf);
 2120: 	INC_SIZE(2);
 2121: 	*buf++ = st_arg;
 2122: 	*buf = st_arg2 + src;
 2123: 	return SLJIT_SUCCESS;
 2124: }
 2125: 
 2126: #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2127: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2128: #else
 2129: static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
 2130: #endif
 2131: 	int dst, sljit_w dstw,
 2132: 	int src, sljit_w srcw)
 2133: {
 2134: #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2135: 	sljit_ub *buf;
 2136: #endif
 2137: 
 2138: 	CHECK_ERROR();
 2139: 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
 2140: 
 2141: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2142: 	compiler->mode32 = 1;
 2143: #endif
 2144: 
 2145: 	if (GET_OPCODE(op) == SLJIT_FCMP) {
 2146: 		compiler->flags_saved = 0;
 2147: #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2148: 		FAIL_IF(emit_fld(compiler, dst, dstw));
 2149: 		FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
 2150: 
 2151: 		/* Copy flags. */
 2152: 		EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2153: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
 2154: 		FAIL_IF(!buf);
 2155: 		INC_SIZE(3);
 2156: 		*buf++ = 0xdf;
 2157: 		*buf++ = 0xe0;
 2158: 		/* Note: lahf is not supported on all x86-64 architectures. */
 2159: 		*buf++ = 0x9e;
 2160: 		EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
 2161: #else
 2162: 		if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
 2163: 			FAIL_IF(emit_fld(compiler, dst, dstw));
 2164: 			FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
 2165: 		} else {
 2166: 			FAIL_IF(emit_fld(compiler, src, srcw));
 2167: 			FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
 2168: 			FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
 2169: 			FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
 2170: 		}
 2171: #endif
 2172: 		return SLJIT_SUCCESS;
 2173: 	}
 2174: 
 2175: 	FAIL_IF(emit_fld(compiler, src, srcw));
 2176: 
 2177: 	switch (op) {
 2178: 	case SLJIT_FNEG:
 2179: 		FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
 2180: 		break;
 2181: 	case SLJIT_FABS:
 2182: 		FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
 2183: 		break;
 2184: 	}
 2185: 
 2186: 	FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
 2187: 
 2188: 	return SLJIT_SUCCESS;
 2189: }
 2190: 
 2191: #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2192: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2193: #else
 2194: static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
 2195: #endif
 2196: 	int dst, sljit_w dstw,
 2197: 	int src1, sljit_w src1w,
 2198: 	int src2, sljit_w src2w)
 2199: {
 2200: 	CHECK_ERROR();
 2201: 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2202: 
 2203: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2204: 	compiler->mode32 = 1;
 2205: #endif
 2206: 
 2207: 	if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
 2208: 		FAIL_IF(emit_fld(compiler, src2, src2w));
 2209: 
 2210: 		switch (op) {
 2211: 		case SLJIT_FADD:
 2212: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
 2213: 			break;
 2214: 		case SLJIT_FSUB:
 2215: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
 2216: 			break;
 2217: 		case SLJIT_FMUL:
 2218: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
 2219: 			break;
 2220: 		case SLJIT_FDIV:
 2221: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
 2222: 			break;
 2223: 		}
 2224: 		return SLJIT_SUCCESS;
 2225: 	}
 2226: 
 2227: 	FAIL_IF(emit_fld(compiler, src1, src1w));
 2228: 
 2229: 	if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
 2230: 		switch (op) {
 2231: 		case SLJIT_FADD:
 2232: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
 2233: 			break;
 2234: 		case SLJIT_FSUB:
 2235: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
 2236: 			break;
 2237: 		case SLJIT_FMUL:
 2238: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
 2239: 			break;
 2240: 		case SLJIT_FDIV:
 2241: 			FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
 2242: 			break;
 2243: 		}
 2244: 		return SLJIT_SUCCESS;
 2245: 	}
 2246: 
 2247: 	switch (op) {
 2248: 	case SLJIT_FADD:
 2249: 		FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
 2250: 		break;
 2251: 	case SLJIT_FSUB:
 2252: 		FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
 2253: 		break;
 2254: 	case SLJIT_FMUL:
 2255: 		FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
 2256: 		break;
 2257: 	case SLJIT_FDIV:
 2258: 		FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
 2259: 		break;
 2260: 	}
 2261: 
 2262: 	FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
 2263: 
 2264: 	return SLJIT_SUCCESS;
 2265: }
 2266: #endif
 2267: 
 2268: #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
 2269: 
 2270: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
 2271: 	int dst, sljit_w dstw,
 2272: 	int src, sljit_w srcw)
 2273: {
 2274: 	if (sse2_available)
 2275: 		return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
 2276: 	else
 2277: 		return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
 2278: }
 2279: 
 2280: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
 2281: 	int dst, sljit_w dstw,
 2282: 	int src1, sljit_w src1w,
 2283: 	int src2, sljit_w src2w)
 2284: {
 2285: 	if (sse2_available)
 2286: 		return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2287: 	else
 2288: 		return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
 2289: }
 2290: 
 2291: #endif
 2292: 
 2293: /* --------------------------------------------------------------------- */
 2294: /*  Conditional instructions                                             */
 2295: /* --------------------------------------------------------------------- */
 2296: 
 2297: SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
 2298: {
 2299: 	sljit_ub *buf;
 2300: 	struct sljit_label *label;
 2301: 
 2302: 	CHECK_ERROR_PTR();
 2303: 	check_sljit_emit_label(compiler);
 2304: 
 2305: 	/* We should restore the flags before the label,
 2306: 	   since other taken jumps has their own flags as well. */
 2307: 	if (SLJIT_UNLIKELY(compiler->flags_saved))
 2308: 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2309: 
 2310: 	if (compiler->last_label && compiler->last_label->size == compiler->size)
 2311: 		return compiler->last_label;
 2312: 
 2313: 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
 2314: 	PTR_FAIL_IF(!label);
 2315: 	set_label(label, compiler);
 2316: 
 2317: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2318: 	PTR_FAIL_IF(!buf);
 2319: 
 2320: 	*buf++ = 0;
 2321: 	*buf++ = 0;
 2322: 
 2323: 	return label;
 2324: }
 2325: 
 2326: SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
 2327: {
 2328: 	sljit_ub *buf;
 2329: 	struct sljit_jump *jump;
 2330: 
 2331: 	CHECK_ERROR_PTR();
 2332: 	check_sljit_emit_jump(compiler, type);
 2333: 
 2334: 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2335: 		if ((type & 0xff) <= SLJIT_JUMP)
 2336: 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
 2337: 		compiler->flags_saved = 0;
 2338: 	}
 2339: 
 2340: 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2341: 	PTR_FAIL_IF_NULL(jump);
 2342: 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 2343: 	type &= 0xff;
 2344: 
 2345: 	if (type >= SLJIT_CALL1)
 2346: 		PTR_FAIL_IF(call_with_args(compiler, type));
 2347: 
 2348: 	/* Worst case size. */
 2349: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2350: 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
 2351: #else
 2352: 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
 2353: #endif
 2354: 
 2355: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2356: 	PTR_FAIL_IF_NULL(buf);
 2357: 
 2358: 	*buf++ = 0;
 2359: 	*buf++ = type + 4;
 2360: 	return jump;
 2361: }
 2362: 
 2363: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
 2364: {
 2365: 	sljit_ub *code;
 2366: 	struct sljit_jump *jump;
 2367: 
 2368: 	CHECK_ERROR();
 2369: 	check_sljit_emit_ijump(compiler, type, src, srcw);
 2370: 
 2371: 	CHECK_EXTRA_REGS(src, srcw, (void)0);
 2372: 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
 2373: 		if (type <= SLJIT_JUMP)
 2374: 			FAIL_IF(emit_restore_flags(compiler, 0));
 2375: 		compiler->flags_saved = 0;
 2376: 	}
 2377: 
 2378: 	if (type >= SLJIT_CALL1) {
 2379: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2380: #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
 2381: 		if (src == SLJIT_TEMPORARY_REG3) {
 2382: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2383: 			src = TMP_REGISTER;
 2384: 		}
 2385: 		if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
 2386: 			if (src & 0xf0) {
 2387: 				EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
 2388: 				src = TMP_REGISTER;
 2389: 			}
 2390: 			else
 2391: 				srcw += sizeof(sljit_w);
 2392: 		}
 2393: #else
 2394: 		if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
 2395: 			if (src & 0xf0) {
 2396: 				EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
 2397: 				src = TMP_REGISTER;
 2398: 			}
 2399: 			else
 2400: 				srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
 2401: 		}
 2402: #endif
 2403: #endif
 2404: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
 2405: 		if (src == SLJIT_TEMPORARY_REG3) {
 2406: 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
 2407: 			src = TMP_REGISTER;
 2408: 		}
 2409: #endif
 2410: 		FAIL_IF(call_with_args(compiler, type));
 2411: 	}
 2412: 
 2413: 	if (src == SLJIT_IMM) {
 2414: 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 2415: 		FAIL_IF_NULL(jump);
 2416: 		set_jump(jump, compiler, JUMP_ADDR);
 2417: 		jump->u.target = srcw;
 2418: 
 2419: 		/* Worst case size. */
 2420: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2421: 		compiler->size += 5;
 2422: #else
 2423: 		compiler->size += 10 + 3;
 2424: #endif
 2425: 
 2426: 		code = (sljit_ub*)ensure_buf(compiler, 2);
 2427: 		FAIL_IF_NULL(code);
 2428: 
 2429: 		*code++ = 0;
 2430: 		*code++ = type + 4;
 2431: 	}
 2432: 	else {
 2433: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2434: 		/* REX_W is not necessary (src is not immediate). */
 2435: 		compiler->mode32 = 1;
 2436: #endif
 2437: 		code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
 2438: 		FAIL_IF(!code);
 2439: 		*code++ = 0xff;
 2440: 		*code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
 2441: 	}
 2442: 	return SLJIT_SUCCESS;
 2443: }
 2444: 
 2445: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
 2446: {
 2447: 	sljit_ub *buf;
 2448: 	sljit_ub cond_set = 0;
 2449: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2450: 	int reg;
 2451: #endif
 2452: 
 2453: 	CHECK_ERROR();
 2454: 	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
 2455: 
 2456: 	if (dst == SLJIT_UNUSED)
 2457: 		return SLJIT_SUCCESS;
 2458: 
 2459: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2460: 	if (SLJIT_UNLIKELY(compiler->flags_saved))
 2461: 		FAIL_IF(emit_restore_flags(compiler, 0));
 2462: 
 2463: 	switch (type) {
 2464: 	case SLJIT_C_EQUAL:
 2465: 	case SLJIT_C_FLOAT_EQUAL:
 2466: 		cond_set = 0x94;
 2467: 		break;
 2468: 
 2469: 	case SLJIT_C_NOT_EQUAL:
 2470: 	case SLJIT_C_FLOAT_NOT_EQUAL:
 2471: 		cond_set = 0x95;
 2472: 		break;
 2473: 
 2474: 	case SLJIT_C_LESS:
 2475: 	case SLJIT_C_FLOAT_LESS:
 2476: 		cond_set = 0x92;
 2477: 		break;
 2478: 
 2479: 	case SLJIT_C_GREATER_EQUAL:
 2480: 	case SLJIT_C_FLOAT_GREATER_EQUAL:
 2481: 		cond_set = 0x93;
 2482: 		break;
 2483: 
 2484: 	case SLJIT_C_GREATER:
 2485: 	case SLJIT_C_FLOAT_GREATER:
 2486: 		cond_set = 0x97;
 2487: 		break;
 2488: 
 2489: 	case SLJIT_C_LESS_EQUAL:
 2490: 	case SLJIT_C_FLOAT_LESS_EQUAL:
 2491: 		cond_set = 0x96;
 2492: 		break;
 2493: 
 2494: 	case SLJIT_C_SIG_LESS:
 2495: 		cond_set = 0x9c;
 2496: 		break;
 2497: 
 2498: 	case SLJIT_C_SIG_GREATER_EQUAL:
 2499: 		cond_set = 0x9d;
 2500: 		break;
 2501: 
 2502: 	case SLJIT_C_SIG_GREATER:
 2503: 		cond_set = 0x9f;
 2504: 		break;
 2505: 
 2506: 	case SLJIT_C_SIG_LESS_EQUAL:
 2507: 		cond_set = 0x9e;
 2508: 		break;
 2509: 
 2510: 	case SLJIT_C_OVERFLOW:
 2511: 	case SLJIT_C_MUL_OVERFLOW:
 2512: 		cond_set = 0x90;
 2513: 		break;
 2514: 
 2515: 	case SLJIT_C_NOT_OVERFLOW:
 2516: 	case SLJIT_C_MUL_NOT_OVERFLOW:
 2517: 		cond_set = 0x91;
 2518: 		break;
 2519: 
 2520: 	case SLJIT_C_FLOAT_NAN:
 2521: 		cond_set = 0x9a;
 2522: 		break;
 2523: 
 2524: 	case SLJIT_C_FLOAT_NOT_NAN:
 2525: 		cond_set = 0x9b;
 2526: 		break;
 2527: 	}
 2528: 
 2529: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2530: 	reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2531: 
 2532: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
 2533: 	FAIL_IF(!buf);
 2534: 	INC_SIZE(4 + 4);
 2535: 	/* Set low register to conditional flag. */
 2536: 	*buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
 2537: 	*buf++ = 0x0f;
 2538: 	*buf++ = cond_set;
 2539: 	*buf++ = 0xC0 | reg_lmap[reg];
 2540: 	*buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
 2541: 	*buf++ = 0x0f;
 2542: 	*buf++ = 0xb6;
 2543: 	*buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
 2544: 
 2545: 	if (reg == TMP_REGISTER) {
 2546: 		if (op == SLJIT_MOV) {
 2547: 			compiler->mode32 = 0;
 2548: 			EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
 2549: 		}
 2550: 		else {
 2551: #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2552: 			compiler->skip_checks = 1;
 2553: #endif
 2554: 			return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
 2555: 		}
 2556: 	}
 2557: #else
 2558: 	if (op == SLJIT_MOV) {
 2559: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2560: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2561: 			FAIL_IF(!buf);
 2562: 			INC_SIZE(3 + 3);
 2563: 			/* Set low byte to conditional flag. */
 2564: 			*buf++ = 0x0f;
 2565: 			*buf++ = cond_set;
 2566: 			*buf++ = 0xC0 | reg_map[dst];
 2567: 
 2568: 			*buf++ = 0x0f;
 2569: 			*buf++ = 0xb6;
 2570: 			*buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
 2571: 		}
 2572: 		else {
 2573: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2574: 
 2575: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
 2576: 			FAIL_IF(!buf);
 2577: 			INC_SIZE(3 + 3);
 2578: 			/* Set al to conditional flag. */
 2579: 			*buf++ = 0x0f;
 2580: 			*buf++ = cond_set;
 2581: 			*buf++ = 0xC0;
 2582: 
 2583: 			*buf++ = 0x0f;
 2584: 			*buf++ = 0xb6;
 2585: 			if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS)
 2586: 				*buf = 0xC0 | (reg_map[dst] << 3);
 2587: 			else {
 2588: 				*buf = 0xC0;
 2589: 				EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
 2590: 			}
 2591: 
 2592: 			EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
 2593: 		}
 2594: 	}
 2595: 	else {
 2596: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
 2597: 			EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
 2598: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
 2599: 			FAIL_IF(!buf);
 2600: 			INC_SIZE(3);
 2601: 
 2602: 			*buf++ = 0x0f;
 2603: 			*buf++ = cond_set;
 2604: 			*buf++ = 0xC0 | reg_map[dst];
 2605: 		}
 2606: 		else {
 2607: 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
 2608: 
 2609: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
 2610: 			FAIL_IF(!buf);
 2611: 			INC_SIZE(3 + 3 + 1);
 2612: 			/* Set al to conditional flag. */
 2613: 			*buf++ = 0x0f;
 2614: 			*buf++ = cond_set;
 2615: 			*buf++ = 0xC0;
 2616: 
 2617: 			*buf++ = 0x0f;
 2618: 			*buf++ = 0xb6;
 2619: 			*buf++ = 0xC0;
 2620: 
 2621: 			*buf++ = 0x90 + reg_map[TMP_REGISTER];
 2622: 		}
 2623: #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
 2624: 		compiler->skip_checks = 1;
 2625: #endif
 2626: 		return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
 2627: 	}
 2628: #endif
 2629: 
 2630: 	return SLJIT_SUCCESS;
 2631: }
 2632: 
 2633: SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
 2634: {
 2635: 	sljit_ub *buf;
 2636: 	struct sljit_const *const_;
 2637: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2638: 	int reg;
 2639: #endif
 2640: 
 2641: 	CHECK_ERROR_PTR();
 2642: 	check_sljit_emit_const(compiler, dst, dstw, init_value);
 2643: 
 2644: 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 2645: 
 2646: 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 2647: 	PTR_FAIL_IF(!const_);
 2648: 	set_const(const_, compiler);
 2649: 
 2650: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2651: 	compiler->mode32 = 0;
 2652: 	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
 2653: 
 2654: 	if (emit_load_imm64(compiler, reg, init_value))
 2655: 		return NULL;
 2656: #else
 2657: 	if (dst == SLJIT_UNUSED)
 2658: 		dst = TMP_REGISTER;
 2659: 
 2660: 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
 2661: 		return NULL;
 2662: #endif
 2663: 
 2664: 	buf = (sljit_ub*)ensure_buf(compiler, 2);
 2665: 	PTR_FAIL_IF(!buf);
 2666: 
 2667: 	*buf++ = 0;
 2668: 	*buf++ = 1;
 2669: 
 2670: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 2671: 	if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
 2672: 		if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
 2673: 			return NULL;
 2674: #endif
 2675: 
 2676: 	return const_;
 2677: }
 2678: 
 2679: SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
 2680: {
 2681: #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 2682: 	*(sljit_w*)addr = new_addr - (addr + 4);
 2683: #else
 2684: 	*(sljit_uw*)addr = new_addr;
 2685: #endif
 2686: }
 2687: 
 2688: SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
 2689: {
 2690: 	*(sljit_w*)addr = new_constant;
 2691: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>