File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre / sljit / sljitNativeX86_64.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:50:25 2012 UTC (12 years, 4 months ago) by misho
Branches: pcre, MAIN
CVS tags: v8_30, HEAD
pcre

    1: /*
    2:  *    Stack-less Just-In-Time compiler
    3:  *
    4:  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
    5:  *
    6:  * Redistribution and use in source and binary forms, with or without modification, are
    7:  * permitted provided that the following conditions are met:
    8:  *
    9:  *   1. Redistributions of source code must retain the above copyright notice, this list of
   10:  *      conditions and the following disclaimer.
   11:  *
   12:  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
   13:  *      of conditions and the following disclaimer in the documentation and/or other materials
   14:  *      provided with the distribution.
   15:  *
   16:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
   17:  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
   19:  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   21:  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   22:  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   23:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   24:  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25:  */
   26: 
   27: /* x86 64-bit arch dependent functions. */
   28: 
   29: static int emit_load_imm64(struct sljit_compiler *compiler, int reg, sljit_w imm)
   30: {
   31: 	sljit_ub *buf;
   32: 
   33: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_w));
   34: 	FAIL_IF(!buf);
   35: 	INC_SIZE(2 + sizeof(sljit_w));
   36: 	*buf++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
   37: 	*buf++ = 0xb8 + (reg_map[reg] & 0x7);
   38: 	*(sljit_w*)buf = imm;
   39: 	return SLJIT_SUCCESS;
   40: }
   41: 
   42: static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type)
   43: {
   44: 	if (type < SLJIT_JUMP) {
   45: 		*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
   46: 		*code_ptr++ = 10 + 3;
   47: 	}
   48: 
   49: 	SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first);
   50: 	*code_ptr++ = REX_W | REX_B;
   51: 	*code_ptr++ = 0xb8 + 1;
   52: 	jump->addr = (sljit_uw)code_ptr;
   53: 
   54: 	if (jump->flags & JUMP_LABEL)
   55: 		jump->flags |= PATCH_MD;
   56: 	else
   57: 		*(sljit_w*)code_ptr = jump->u.target;
   58: 
   59: 	code_ptr += sizeof(sljit_w);
   60: 	*code_ptr++ = REX_B;
   61: 	*code_ptr++ = 0xff;
   62: 	*code_ptr++ = (type >= SLJIT_FAST_CALL) ? 0xd1 /* call */ : 0xe1 /* jmp */;
   63: 
   64: 	return code_ptr;
   65: }
   66: 
   67: static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type)
   68: {
   69: 	sljit_w delta = addr - ((sljit_w)code_ptr + 1 + sizeof(sljit_hw));
   70: 
   71: 	if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) {
   72: 		*code_ptr++ = (type == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
   73: 		*(sljit_w*)code_ptr = delta;
   74: 	}
   75: 	else {
   76: 		SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
   77: 		*code_ptr++ = REX_W | REX_B;
   78: 		*code_ptr++ = 0xb8 + 1;
   79: 		*(sljit_w*)code_ptr = addr;
   80: 		code_ptr += sizeof(sljit_w);
   81: 		*code_ptr++ = REX_B;
   82: 		*code_ptr++ = 0xff;
   83: 		*code_ptr++ = (type == 2) ? 0xd1 /* call */ : 0xe1 /* jmp */;
   84: 	}
   85: 
   86: 	return code_ptr;
   87: }
   88: 
   89: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
   90: {
   91: 	int size, pushed_size;
   92: 	sljit_ub *buf;
   93: 
   94: 	CHECK_ERROR();
   95: 	check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size);
   96: 
   97: 	compiler->temporaries = temporaries;
   98: 	compiler->saveds = saveds;
   99: 	compiler->flags_saved = 0;
  100: 
  101: 	size = saveds;
  102: 	/* Including the return address saved by the call instruction. */
  103: 	pushed_size = (saveds + 1) * sizeof(sljit_w);
  104: #ifndef _WIN64
  105: 	if (saveds >= 2)
  106: 		size += saveds - 1;
  107: #else
  108: 	/* Saving the virtual stack pointer. */
  109: 	compiler->has_locals = local_size > 0;
  110: 	if (local_size > 0) {
  111: 		size += 2;
  112: 		pushed_size += sizeof(sljit_w);
  113: 	}
  114: 	if (saveds >= 4)
  115: 		size += saveds - 3;
  116: 	if (temporaries >= 5) {
  117: 		size += (5 - 4) * 2;
  118: 		pushed_size += sizeof(sljit_w);
  119: 	}
  120: #endif
  121: 	size += args * 3;
  122: 	if (size > 0) {
  123: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  124: 		FAIL_IF(!buf);
  125: 
  126: 		INC_SIZE(size);
  127: 		if (saveds >= 5) {
  128: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg);
  129: 			*buf++ = REX_B;
  130: 			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]);
  131: 		}
  132: 		if (saveds >= 4) {
  133: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg);
  134: 			*buf++ = REX_B;
  135: 			PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]);
  136: 		}
  137: 		if (saveds >= 3) {
  138: #ifndef _WIN64
  139: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg);
  140: 			*buf++ = REX_B;
  141: #else
  142: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg);
  143: #endif
  144: 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]);
  145: 		}
  146: 		if (saveds >= 2) {
  147: #ifndef _WIN64
  148: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg);
  149: 			*buf++ = REX_B;
  150: #else
  151: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg);
  152: #endif
  153: 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]);
  154: 		}
  155: 		if (saveds >= 1) {
  156: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg);
  157: 			PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]);
  158: 		}
  159: #ifdef _WIN64
  160: 		if (temporaries >= 5) {
  161: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
  162: 			*buf++ = REX_B;
  163: 			PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
  164: 		}
  165: 		if (local_size > 0) {
  166: 			SLJIT_COMPILE_ASSERT(reg_map[SLJIT_LOCALS_REG] >= 8, locals_reg_is_hireg);
  167: 			*buf++ = REX_B;
  168: 			PUSH_REG(reg_lmap[SLJIT_LOCALS_REG]);
  169: 		}
  170: #endif
  171: 
  172: #ifndef _WIN64
  173: 		if (args > 0) {
  174: 			*buf++ = REX_W;
  175: 			*buf++ = 0x8b;
  176: 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7;
  177: 		}
  178: 		if (args > 1) {
  179: 			*buf++ = REX_W | REX_R;
  180: 			*buf++ = 0x8b;
  181: 			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6;
  182: 		}
  183: 		if (args > 2) {
  184: 			*buf++ = REX_W | REX_R;
  185: 			*buf++ = 0x8b;
  186: 			*buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2;
  187: 		}
  188: #else
  189: 		if (args > 0) {
  190: 			*buf++ = REX_W;
  191: 			*buf++ = 0x8b;
  192: 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1;
  193: 		}
  194: 		if (args > 1) {
  195: 			*buf++ = REX_W;
  196: 			*buf++ = 0x8b;
  197: 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2;
  198: 		}
  199: 		if (args > 2) {
  200: 			*buf++ = REX_W | REX_B;
  201: 			*buf++ = 0x8b;
  202: 			*buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0;
  203: 		}
  204: #endif
  205: 	}
  206: 
  207: 	local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
  208: #ifdef _WIN64
  209: 	local_size += 4 * sizeof(sljit_w);
  210: 	compiler->local_size = local_size;
  211: 	if (local_size > 1024) {
  212: 		/* Allocate the stack for the function itself. */
  213: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  214: 		FAIL_IF(!buf);
  215: 		INC_SIZE(4);
  216: 		*buf++ = REX_W;
  217: 		*buf++ = 0x83;
  218: 		*buf++ = 0xc0 | (5 << 3) | 4;
  219: 		/* Pushed size must be divisible by 8. */
  220: 		SLJIT_ASSERT(!(pushed_size & 0x7));
  221: 		if (pushed_size & 0x8) {
  222: 			*buf++ = 5 * sizeof(sljit_w);
  223: 			local_size -= 5 * sizeof(sljit_w);
  224: 		} else {
  225: 			*buf++ = 4 * sizeof(sljit_w);
  226: 			local_size -= 4 * sizeof(sljit_w);
  227: 		}
  228: 		FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size));
  229: 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack)));
  230: 	}
  231: #else
  232: 	compiler->local_size = local_size;
  233: 	if (local_size > 0) {
  234: #endif
  235: 		/* In case of Win64, local_size is always > 4 * sizeof(sljit_w) */
  236: 		if (local_size <= 127) {
  237: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  238: 			FAIL_IF(!buf);
  239: 			INC_SIZE(4);
  240: 			*buf++ = REX_W;
  241: 			*buf++ = 0x83;
  242: 			*buf++ = 0xc0 | (5 << 3) | 4;
  243: 			*buf++ = local_size;
  244: 		}
  245: 		else {
  246: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
  247: 			FAIL_IF(!buf);
  248: 			INC_SIZE(7);
  249: 			*buf++ = REX_W;
  250: 			*buf++ = 0x81;
  251: 			*buf++ = 0xc0 | (5 << 3) | 4;
  252: 			*(sljit_hw*)buf = local_size;
  253: 			buf += sizeof(sljit_hw);
  254: 		}
  255: #ifndef _WIN64
  256: 	}
  257: #endif
  258: 
  259: #ifdef _WIN64
  260: 	if (compiler->has_locals) {
  261: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
  262: 		FAIL_IF(!buf);
  263: 		INC_SIZE(5);
  264: 		*buf++ = REX_W | REX_R;
  265: 		*buf++ = 0x8d;
  266: 		*buf++ = 0x40 | (reg_lmap[SLJIT_LOCALS_REG] << 3) | 0x4;
  267: 		*buf++ = 0x24;
  268: 		*buf = 4 * sizeof(sljit_w);
  269: 	}
  270: #endif
  271: 
  272: 	return SLJIT_SUCCESS;
  273: }
  274: 
  275: SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size)
  276: {
  277: 	int pushed_size;
  278: 
  279: 	CHECK_ERROR_VOID();
  280: 	check_sljit_set_context(compiler, args, temporaries, saveds, local_size);
  281: 
  282: 	compiler->temporaries = temporaries;
  283: 	compiler->saveds = saveds;
  284: 	/* Including the return address saved by the call instruction. */
  285: 	pushed_size = (saveds + 1) * sizeof(sljit_w);
  286: #ifdef _WIN64
  287: 	compiler->has_locals = local_size > 0;
  288: 	if (local_size > 0)
  289: 		pushed_size += sizeof(sljit_w);
  290: 	if (temporaries >= 5)
  291: 		pushed_size += sizeof(sljit_w);
  292: #endif
  293: 	compiler->local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size;
  294: #ifdef _WIN64
  295: 	compiler->local_size += 4 * sizeof(sljit_w);
  296: #endif
  297: }
  298: 
  299: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw)
  300: {
  301: 	int size;
  302: 	sljit_ub *buf;
  303: 
  304: 	CHECK_ERROR();
  305: 	check_sljit_emit_return(compiler, op, src, srcw);
  306: 
  307: 	compiler->flags_saved = 0;
  308: 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  309: 
  310: 	if (compiler->local_size > 0) {
  311: 		if (compiler->local_size <= 127) {
  312: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
  313: 			FAIL_IF(!buf);
  314: 			INC_SIZE(4);
  315: 			*buf++ = REX_W;
  316: 			*buf++ = 0x83;
  317: 			*buf++ = 0xc0 | (0 << 3) | 4;
  318: 			*buf = compiler->local_size;
  319: 		}
  320: 		else {
  321: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 7);
  322: 			FAIL_IF(!buf);
  323: 			INC_SIZE(7);
  324: 			*buf++ = REX_W;
  325: 			*buf++ = 0x81;
  326: 			*buf++ = 0xc0 | (0 << 3) | 4;
  327: 			*(sljit_hw*)buf = compiler->local_size;
  328: 		}
  329: 	}
  330: 
  331: 	size = 1 + compiler->saveds;
  332: #ifndef _WIN64
  333: 	if (compiler->saveds >= 2)
  334: 		size += compiler->saveds - 1;
  335: #else
  336: 	if (compiler->has_locals)
  337: 		size += 2;
  338: 	if (compiler->saveds >= 4)
  339: 		size += compiler->saveds - 3;
  340: 	if (compiler->temporaries >= 5)
  341: 		size += (5 - 4) * 2;
  342: #endif
  343: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
  344: 	FAIL_IF(!buf);
  345: 
  346: 	INC_SIZE(size);
  347: 
  348: #ifdef _WIN64
  349: 	if (compiler->has_locals) {
  350: 		*buf++ = REX_B;
  351: 		POP_REG(reg_lmap[SLJIT_LOCALS_REG]);
  352: 	}
  353: 	if (compiler->temporaries >= 5) {
  354: 		*buf++ = REX_B;
  355: 		POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
  356: 	}
  357: #endif
  358: 	if (compiler->saveds >= 1)
  359: 		POP_REG(reg_map[SLJIT_SAVED_REG1]);
  360: 	if (compiler->saveds >= 2) {
  361: #ifndef _WIN64
  362: 		*buf++ = REX_B;
  363: #endif
  364: 		POP_REG(reg_lmap[SLJIT_SAVED_REG2]);
  365: 	}
  366: 	if (compiler->saveds >= 3) {
  367: #ifndef _WIN64
  368: 		*buf++ = REX_B;
  369: #endif
  370: 		POP_REG(reg_lmap[SLJIT_SAVED_REG3]);
  371: 	}
  372: 	if (compiler->saveds >= 4) {
  373: 		*buf++ = REX_B;
  374: 		POP_REG(reg_lmap[SLJIT_SAVED_EREG1]);
  375: 	}
  376: 	if (compiler->saveds >= 5) {
  377: 		*buf++ = REX_B;
  378: 		POP_REG(reg_lmap[SLJIT_SAVED_EREG2]);
  379: 	}
  380: 
  381: 	RET();
  382: 	return SLJIT_SUCCESS;
  383: }
  384: 
  385: /* --------------------------------------------------------------------- */
  386: /*  Operators                                                            */
  387: /* --------------------------------------------------------------------- */
  388: 
  389: static int emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_w imm)
  390: {
  391: 	sljit_ub *buf;
  392: 
  393: 	if (rex != 0) {
  394: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_hw));
  395: 		FAIL_IF(!buf);
  396: 		INC_SIZE(2 + sizeof(sljit_hw));
  397: 		*buf++ = rex;
  398: 		*buf++ = opcode;
  399: 		*(sljit_hw*)buf = imm;
  400: 	}
  401: 	else {
  402: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_hw));
  403: 		FAIL_IF(!buf);
  404: 		INC_SIZE(1 + sizeof(sljit_hw));
  405: 		*buf++ = opcode;
  406: 		*(sljit_hw*)buf = imm;
  407: 	}
  408: 	return SLJIT_SUCCESS;
  409: }
  410: 
  411: static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size,
  412: 	/* The register or immediate operand. */
  413: 	int a, sljit_w imma,
  414: 	/* The general operand (not immediate). */
  415: 	int b, sljit_w immb)
  416: {
  417: 	sljit_ub *buf;
  418: 	sljit_ub *buf_ptr;
  419: 	sljit_ub rex = 0;
  420: 	int flags = size & ~0xf;
  421: 	int inst_size;
  422: 
  423: 	/* The immediate operand must be 32 bit. */
  424: 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  425: 	/* Both cannot be switched on. */
  426: 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  427: 	/* Size flags not allowed for typed instructions. */
  428: 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  429: 	/* Both size flags cannot be switched on. */
  430: 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  431: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  432: 	/* SSE2 and immediate is not possible. */
  433: 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  434: #endif
  435: 
  436: 	size &= 0xf;
  437: 	inst_size = size;
  438: 
  439: 	if ((b & SLJIT_MEM) && !(b & 0xf0) && NOT_HALFWORD(immb)) {
  440: 		if (emit_load_imm64(compiler, TMP_REG3, immb))
  441: 			return NULL;
  442: 		immb = 0;
  443: 		if (b & 0xf)
  444: 			b |= TMP_REG3 << 4;
  445: 		else
  446: 			b |= TMP_REG3;
  447: 	}
  448: 
  449: 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  450: 		rex |= REX_W;
  451: 	else if (flags & EX86_REX)
  452: 		rex |= REX;
  453: 
  454: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  455: 	if (flags & EX86_PREF_F2)
  456: 		inst_size++;
  457: #endif
  458: 	if (flags & EX86_PREF_66)
  459: 		inst_size++;
  460: 
  461: 	/* Calculate size of b. */
  462: 	inst_size += 1; /* mod r/m byte. */
  463: 	if (b & SLJIT_MEM) {
  464: 		if ((b & 0x0f) == SLJIT_UNUSED)
  465: 			inst_size += 1 + sizeof(sljit_hw); /* SIB byte required to avoid RIP based addressing. */
  466: 		else {
  467: 			if (reg_map[b & 0x0f] >= 8)
  468: 				rex |= REX_B;
  469: 			if (immb != 0 && !(b & 0xf0)) {
  470: 				/* Immediate operand. */
  471: 				if (immb <= 127 && immb >= -128)
  472: 					inst_size += sizeof(sljit_b);
  473: 				else
  474: 					inst_size += sizeof(sljit_hw);
  475: 			}
  476: 		}
  477: 
  478: #ifndef _WIN64
  479: 		if ((b & 0xf) == SLJIT_LOCALS_REG && (b & 0xf0) == 0)
  480: 			b |= SLJIT_LOCALS_REG << 4;
  481: #endif
  482: 
  483: 		if ((b & 0xf0) != SLJIT_UNUSED) {
  484: 			inst_size += 1; /* SIB byte. */
  485: 			if (reg_map[(b >> 4) & 0x0f] >= 8)
  486: 				rex |= REX_X;
  487: 		}
  488: 	}
  489: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  490: 	else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
  491: 		rex |= REX_B;
  492: #else
  493: 	else if (reg_map[b] >= 8)
  494: 		rex |= REX_B;
  495: #endif
  496: 
  497: 	if (a & SLJIT_IMM) {
  498: 		if (flags & EX86_BIN_INS) {
  499: 			if (imma <= 127 && imma >= -128) {
  500: 				inst_size += 1;
  501: 				flags |= EX86_BYTE_ARG;
  502: 			} else
  503: 				inst_size += 4;
  504: 		}
  505: 		else if (flags & EX86_SHIFT_INS) {
  506: 			imma &= compiler->mode32 ? 0x1f : 0x3f;
  507: 			if (imma != 1) {
  508: 				inst_size ++;
  509: 				flags |= EX86_BYTE_ARG;
  510: 			}
  511: 		} else if (flags & EX86_BYTE_ARG)
  512: 			inst_size++;
  513: 		else if (flags & EX86_HALF_ARG)
  514: 			inst_size += sizeof(short);
  515: 		else
  516: 			inst_size += sizeof(sljit_hw);
  517: 	}
  518: 	else {
  519: 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  520: 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  521: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  522: 		if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
  523: 			rex |= REX_R;
  524: #else
  525: 		if (reg_map[a] >= 8)
  526: 			rex |= REX_R;
  527: #endif
  528: 	}
  529: 
  530: 	if (rex)
  531: 		inst_size++;
  532: 
  533: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size);
  534: 	PTR_FAIL_IF(!buf);
  535: 
  536: 	/* Encoding the byte. */
  537: 	INC_SIZE(inst_size);
  538: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  539: 	if (flags & EX86_PREF_F2)
  540: 		*buf++ = 0xf2;
  541: #endif
  542: 	if (flags & EX86_PREF_66)
  543: 		*buf++ = 0x66;
  544: 	if (rex)
  545: 		*buf++ = rex;
  546: 	buf_ptr = buf + size;
  547: 
  548: 	/* Encode mod/rm byte. */
  549: 	if (!(flags & EX86_SHIFT_INS)) {
  550: 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  551: 			*buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81;
  552: 
  553: 		if ((a & SLJIT_IMM) || (a == 0))
  554: 			*buf_ptr = 0;
  555: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  556: 		else if (!(flags & EX86_SSE2))
  557: 			*buf_ptr = reg_lmap[a] << 3;
  558: 		else
  559: 			*buf_ptr = a << 3;
  560: #else
  561: 		else
  562: 			*buf_ptr = reg_lmap[a] << 3;
  563: #endif
  564: 	}
  565: 	else {
  566: 		if (a & SLJIT_IMM) {
  567: 			if (imma == 1)
  568: 				*buf = 0xd1;
  569: 			else
  570: 				*buf = 0xc1;
  571: 		} else
  572: 			*buf = 0xd3;
  573: 		*buf_ptr = 0;
  574: 	}
  575: 
  576: 	if (!(b & SLJIT_MEM))
  577: #if (defined SLJIT_SSE2 && SLJIT_SSE2)
  578: 		*buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
  579: #else
  580: 		*buf_ptr++ |= 0xc0 + reg_lmap[b];
  581: #endif
  582: 	else if ((b & 0x0f) != SLJIT_UNUSED) {
  583: #ifdef _WIN64
  584: 		SLJIT_ASSERT((b & 0xf0) != (SLJIT_LOCALS_REG << 4));
  585: #endif
  586: 		if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) {
  587: 			if (immb != 0) {
  588: 				if (immb <= 127 && immb >= -128)
  589: 					*buf_ptr |= 0x40;
  590: 				else
  591: 					*buf_ptr |= 0x80;
  592: 			}
  593: 
  594: 			if ((b & 0xf0) == SLJIT_UNUSED)
  595: 				*buf_ptr++ |= reg_lmap[b & 0x0f];
  596: 			else {
  597: 				*buf_ptr++ |= 0x04;
  598: 				*buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3);
  599: 			}
  600: 
  601: 			if (immb != 0) {
  602: 				if (immb <= 127 && immb >= -128)
  603: 					*buf_ptr++ = immb; /* 8 bit displacement. */
  604: 				else {
  605: 					*(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */
  606: 					buf_ptr += sizeof(sljit_hw);
  607: 				}
  608: 			}
  609: 		}
  610: 		else {
  611: 			*buf_ptr++ |= 0x04;
  612: 			*buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3) | (immb << 6);
  613: 		}
  614: 	}
  615: 	else {
  616: 		*buf_ptr++ |= 0x04;
  617: 		*buf_ptr++ = 0x25;
  618: 		*(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */
  619: 		buf_ptr += sizeof(sljit_hw);
  620: 	}
  621: 
  622: 	if (a & SLJIT_IMM) {
  623: 		if (flags & EX86_BYTE_ARG)
  624: 			*buf_ptr = imma;
  625: 		else if (flags & EX86_HALF_ARG)
  626: 			*(short*)buf_ptr = imma;
  627: 		else if (!(flags & EX86_SHIFT_INS))
  628: 			*(sljit_hw*)buf_ptr = imma;
  629: 	}
  630: 
  631: 	return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1);
  632: }
  633: 
  634: /* --------------------------------------------------------------------- */
  635: /*  Call / return instructions                                           */
  636: /* --------------------------------------------------------------------- */
  637: 
  638: static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type)
  639: {
  640: 	sljit_ub *buf;
  641: 
  642: #ifndef _WIN64
  643: 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 6 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
  644: 
  645: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
  646: 	FAIL_IF(!buf);
  647: 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
  648: 	if (type >= SLJIT_CALL3) {
  649: 		*buf++ = REX_W;
  650: 		*buf++ = 0x8b;
  651: 		*buf++ = 0xc0 | (0x2 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
  652: 	}
  653: 	*buf++ = REX_W;
  654: 	*buf++ = 0x8b;
  655: 	*buf++ = 0xc0 | (0x7 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
  656: #else
  657: 	SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 2 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers);
  658: 
  659: 	buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
  660: 	FAIL_IF(!buf);
  661: 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
  662: 	if (type >= SLJIT_CALL3) {
  663: 		*buf++ = REX_W | REX_R;
  664: 		*buf++ = 0x8b;
  665: 		*buf++ = 0xc0 | (0x0 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3];
  666: 	}
  667: 	*buf++ = REX_W;
  668: 	*buf++ = 0x8b;
  669: 	*buf++ = 0xc0 | (0x1 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1];
  670: #endif
  671: 	return SLJIT_SUCCESS;
  672: }
  673: 
  674: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size)
  675: {
  676: 	sljit_ub *buf;
  677: 
  678: 	CHECK_ERROR();
  679: 	check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size);
  680: 
  681: 	compiler->temporaries = temporaries;
  682: 	compiler->saveds = saveds;
  683: 	compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1);
  684: #ifdef _WIN64
  685: 	compiler->local_size += 4 * sizeof(sljit_w);
  686: #endif
  687: 
  688: 	/* For UNUSED dst. Uncommon, but possible. */
  689: 	if (dst == SLJIT_UNUSED)
  690: 		dst = TMP_REGISTER;
  691: 
  692: 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
  693: 		if (reg_map[dst] < 8) {
  694: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  695: 			FAIL_IF(!buf);
  696: 
  697: 			INC_SIZE(1);
  698: 			POP_REG(reg_lmap[dst]);
  699: 		}
  700: 		else {
  701: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
  702: 			FAIL_IF(!buf);
  703: 
  704: 			INC_SIZE(2);
  705: 			*buf++ = REX_B;
  706: 			POP_REG(reg_lmap[dst]);
  707: 		}
  708: 	}
  709: 	else if (dst & SLJIT_MEM) {
  710: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  711: 		/* REX_W is not necessary (src is not immediate). */
  712: 		compiler->mode32 = 1;
  713: #endif
  714: 		buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  715: 		FAIL_IF(!buf);
  716: 		*buf++ = 0x8f;
  717: 	}
  718: 	return SLJIT_SUCCESS;
  719: }
  720: 
  721: SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
  722: {
  723: 	sljit_ub *buf;
  724: 
  725: 	CHECK_ERROR();
  726: 	check_sljit_emit_fast_return(compiler, src, srcw);
  727: 
  728: 	CHECK_EXTRA_REGS(src, srcw, (void)0);
  729: 
  730: 	if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
  731: 		FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw));
  732: 		src = TMP_REGISTER;
  733: 	}
  734: 
  735: 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
  736: 		if (reg_map[src] < 8) {
  737: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1);
  738: 			FAIL_IF(!buf);
  739: 
  740: 			INC_SIZE(1 + 1);
  741: 			PUSH_REG(reg_lmap[src]);
  742: 		}
  743: 		else {
  744: 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1);
  745: 			FAIL_IF(!buf);
  746: 
  747: 			INC_SIZE(2 + 1);
  748: 			*buf++ = REX_B;
  749: 			PUSH_REG(reg_lmap[src]);
  750: 		}
  751: 	}
  752: 	else if (src & SLJIT_MEM) {
  753: #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  754: 		/* REX_W is not necessary (src is not immediate). */
  755: 		compiler->mode32 = 1;
  756: #endif
  757: 		buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  758: 		FAIL_IF(!buf);
  759: 		*buf++ = 0xff;
  760: 		*buf |= 6 << 3;
  761: 
  762: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
  763: 		FAIL_IF(!buf);
  764: 		INC_SIZE(1);
  765: 	}
  766: 	else {
  767: 		SLJIT_ASSERT(IS_HALFWORD(srcw));
  768: 		/* SLJIT_IMM. */
  769: 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1);
  770: 		FAIL_IF(!buf);
  771: 
  772: 		INC_SIZE(5 + 1);
  773: 		*buf++ = 0x68;
  774: 		*(sljit_hw*)buf = srcw;
  775: 		buf += sizeof(sljit_hw);
  776: 	}
  777: 
  778: 	RET();
  779: 	return SLJIT_SUCCESS;
  780: }
  781: 
  782: 
  783: /* --------------------------------------------------------------------- */
  784: /*  Extend input                                                         */
  785: /* --------------------------------------------------------------------- */
  786: 
  787: static int emit_mov_int(struct sljit_compiler *compiler, int sign,
  788: 	int dst, sljit_w dstw,
  789: 	int src, sljit_w srcw)
  790: {
  791: 	sljit_ub* code;
  792: 	int dst_r;
  793: 
  794: 	compiler->mode32 = 0;
  795: 
  796: 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  797: 		return SLJIT_SUCCESS; /* Empty instruction. */
  798: 
  799: 	if (src & SLJIT_IMM) {
  800: 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
  801: 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  802: 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
  803: 				FAIL_IF(!code);
  804: 				*code = 0xc7;
  805: 				return SLJIT_SUCCESS;
  806: 			}
  807: 			return emit_load_imm64(compiler, dst, srcw);
  808: 		}
  809: 		compiler->mode32 = 1;
  810: 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw);
  811: 		FAIL_IF(!code);
  812: 		*code = 0xc7;
  813: 		compiler->mode32 = 0;
  814: 		return SLJIT_SUCCESS;
  815: 	}
  816: 
  817: 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_SAVED_REG3) ? dst : TMP_REGISTER;
  818: 
  819: 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_SAVED_REG3))
  820: 		dst_r = src;
  821: 	else {
  822: 		if (sign) {
  823: 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  824: 			FAIL_IF(!code);
  825: 			*code++ = 0x63;
  826: 		} else {
  827: 			compiler->mode32 = 1;
  828: 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  829: 			compiler->mode32 = 0;
  830: 		}
  831: 	}
  832: 
  833: 	if (dst & SLJIT_MEM) {
  834: 		compiler->mode32 = 1;
  835: 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  836: 		FAIL_IF(!code);
  837: 		*code = 0x89;
  838: 		compiler->mode32 = 0;
  839: 	}
  840: 
  841: 	return SLJIT_SUCCESS;
  842: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>