diff -aruN shujit-0.7.1/ChangeLog shujit/ChangeLog --- shujit-0.7.1/ChangeLog Mon Mar 19 14:41:52 2001 +++ shujit/ChangeLog Mon Jul 2 15:19:35 2001 @@ -1,5 +1,55 @@ $Id$ +[20010702] + +strictfp のために fscale 命令ではなく乗算 (fmul 命令) を使うようにした。 +(compiler.h) + +0.7.2 リリース。 + +[20010629] + +マクロ CAUSE_STACKOVERFLOW と NULLEXC_BY_SIGNAL が define されている場合、 +これまで、NullPointerException が StackOverflowError だと解釈されてしまう +可能性がわずかにあった。この判定を厳密に行うようにした。 +あらゆる FP 演算を強制的に strictfp にした状態で +SPEC JVM98 の _209_db をデータサイズ 10 以上で実行して発覚。 +(signal.c) + +[20010626] + +SSE2 に対応させた。 +configure --enable-sse2 とした場合、浮動小数点数の四則演算他に +x87 命令ではなく SSE2 命令を使うようにした。 +SSE2 で行うようにした処理は次の通り: + - 四則演算 + 関係する内部命令を SSE2 命令に対応させた: + fadd, fsub, fmul, fdiv, dadd, dsub, dmul, ddiv および、 + fld, fld4, fload_fld, faload_fld, dld, dld8, dload_dld, daload_dld, + fst, fst_fstore, fst_fastore, dst, dst_dstore, dst_dastore + また、SSE2 を使う場合は strictfp のための処理をコンパイルしないようにした。 + strictfp の指定が一致した場合のみ inlining できる、という制約を外した。 + fld, fst, dld, dst 系内部命令の SSE2 移行に伴い、f2d, d2f の処理も変更。 + - 平方根 (Math#sqrt()) + - 32bit 整数から FP への変換 (バイトコード命令 i2f, i2d) +SSE2 への移行は保留した処理は次の通り: + - 64bit 整数からの変換 (バイトコード命令 l2f, l2d) + SSE2 だけでは厄介なので、x87 命令で処理。 + - FP から整数への変換 (バイトコード命令 f2i, f2l, d2i, d2l) + 面倒なので保留。 + - FP の比較 (バイトコード命令 fcmpl, fcmpg, dcmpl, dcmpg) + 面倒なので保留。 + +バイトコード命令 fneg, dneg で浮動小数点数の符号を反転するために、 +x87 命令 fchs を使うのは止めた。 +xor 命令で直接最上位ビットを反転させるようにした。 +(code.c, compile.c) + +[20010625] + +configure に --enable-sse2 オプションを追加した。 +(configure.in, config.h.in) + [20010319] 0.7.1 リリース。 diff -aruN shujit-0.7.1/README shujit/README --- shujit-0.7.1/README Mon Mar 19 14:43:58 2001 +++ shujit/README Mon Jul 2 14:11:37 2001 @@ -1,21 +1,21 @@ shuJIT - JIT compiler for Sun JVM/x86 http://www.shudo.net/jit/ - Kazuyuki Shudo + Kazuyuki Shudo * What this is -ShuJIT is a Just-in-Time bytecode compiler (JIT) which -works with Sun Microsystems' Java(tm) Virtual Machine -(JVM) such as JDK and JRE on Intel x86 processors. +ShuJIT is a Java Just-in-Time compiler (JIT) which +works with Sun Microsystems' Classic Virtual Machine +such as JDK and JRE on Intel x86 processors. * Platforms Working on the following platforms is confirmed. - Linux - - Blackdown JDK 1.2.2 FCS, gcc 2.95.2, glibc 2.2.1 - and Linux 2.4.3-pre4 - - JDK 1.1.8v3, gcc 2.95.2, glibc 2.2.1 and Linux 2.4.3-pre4 + - Blackdown JDK 1.2.2 FCS, gcc 2.95.2, glibc 2.2.3 + and Linux 2.4.5-ac22 + - JDK 1.1.8v3, gcc 2.95.2, glibc 2.2.3 and Linux 2.4.5-ac22 - JDK 1.1.7v1a, egcs 1.1.2, libc 5.4.38 and Linux 2.0.35 - FreeBSD @@ -53,11 +53,11 @@ - JDK 1.1.X or 1.2.X (or 1.3 classic VM) shuJIT can work with Sun's classic VM included in JDK. Other JVMs, i.e. IBM JDK and HotSpot VM, are not supported. -- gcc 2.95.2 or EGCS +- gcc 2.95.X or EGCS You can examine the version number of your GCC with -v option. % gcc -v gcc 2.96, which is a snapshot for developers, and gcc 2.7.X - may cause problems. You cannot use them to compile shuJIT. + cause problems. You cannot use those versions to compile shuJIT. - objdump (in GNU binutils) Most Linux distributions and ELF FreeBSD systems have it. - Ruby @@ -100,4 +100,4 @@ SHUDO Kazuyuki/首藤一幸 私をたばねないで あらせいとうの花のように - shudoh@muraoka.info.waseda.ac.jp + shudoh@computer.org diff -aruN shujit-0.7.1/code.c shujit/code.c --- shujit-0.7.1/code.c Sun Mar 11 16:10:16 2001 +++ shujit/code.c Fri Jun 29 16:45:15 2001 @@ -59,21 +59,22 @@ }; // for strictfp -#ifdef STRICT_USE_FSCALE -# ifdef STRICT_FSCALE_USE_FLOAT +#ifndef USE_SSE2 +# ifdef STRICT_USE_FSCALE +# ifdef STRICT_FSCALE_USE_FLOAT // exponents of scales float double_scale_pos = 15360.0f; // 16383 - 1023 float double_scale_neg = -15360.0f; // -(16383 - 1023) float single_scale_pos = 16256.0f; // 16383 - 127 float single_scale_neg = -16256.0f; // -(16383 - 127) -# else +# else // scales in integer int32_t double_scale_pos = 15360; int32_t double_scale_neg = -15360; int32_t single_scale_pos = 16256; int32_t single_scale_neg = -16256; -# endif // STRICT_FSCALE_USE_FLOAT -#else +# endif // STRICT_FSCALE_USE_FLOAT +# else // scales in extended precision floating-point unsigned const char double_scale_pos[10] = { 0, 0, 0, 0, 0, 0, 0, 0x80, 0xff, 0x7b }; // 2^ (16383 - 1023) @@ -83,7 +84,8 @@ { 0, 0, 0, 0, 0, 0, 0, 0x80, 0x7f, 0x7f }; // 2^ (16383 - 127) unsigned const char single_scale_neg[10] = { 0, 0, 0, 0, 0, 0, 0, 0x80, 0x7f, 0x00 }; // 2^ -(16383 - 127) -#endif // STRICT_USE_FSCALE +# endif // STRICT_USE_FSCALE +#endif // !USE_SSE2 #define COMPILEDCODE(DST) \ @@ -616,38 +618,39 @@ #endif // METAVM +#ifndef USE_SSE2 CODE(opc_strict_enter, strict_enter, STANY, STSTA, OPC_NONE) { -#ifdef STRICT_PRELOAD +# ifdef STRICT_PRELOAD // push scales into FPU register -# ifdef STRICT_USE_FSCALE -# ifdef STRICT_FSCALE_USE_FLOAT +# ifdef STRICT_USE_FSCALE +# ifdef STRICT_FSCALE_USE_FLOAT asm("flds %0\n\t" : : "m" (single_scale_neg)); asm("flds %0\n\t" : : "m" (double_scale_neg)); -# else +# else asm("fildl %0\n\t" : : "m" (single_scale_neg)); asm("fildl %0\n\t" : : "m" (double_scale_neg)); -# endif // STRICT_FSCALE_USE_FLOAT -# else // STRICT_USE_FSCALE +# endif // STRICT_FSCALE_USE_FLOAT +# else // STRICT_USE_FSCALE asm("fldt %0\n\t" : : "m" (*single_scale_neg)); asm("fldt %0\n\t" : : "m" (*single_scale_pos)); asm("fldt %0\n\t" : : "m" (*double_scale_neg)); asm("fldt %0\n\t" : : "m" (*double_scale_pos)); -# endif // STRICT_USE_FSCALE -#endif // STRICT_PRELOAD +# endif // STRICT_USE_FSCALE +# endif // STRICT_PRELOAD } CODE(opc_strict_exit, strict_exit, STANY, STSTA, OPC_NONE) { -#ifdef STRICT_PRELOAD +# ifdef STRICT_PRELOAD // pop scales from FPU register -# ifdef STRICT_USE_FSCALE +# ifdef STRICT_USE_FSCALE asm("fcompp"); // is equal to ffreep %st(0) x 2 -# else +# else asm("fcompp\n\tfcompp"); -# endif // STRICT_USE_FSCALE -#endif // STRICT_PRELOAD +# endif // STRICT_USE_FSCALE +# endif // STRICT_PRELOAD } -#ifndef FORCE_DOUBLE_PRECISION +# ifndef FORCE_DOUBLE_PRECISION // save FPU rounding precision CODE(opc_fppc_save, fppc_save, STANY, STSTA, OPC_NONE) { // save FPU control word @@ -661,7 +664,7 @@ } // set FPU rounding precision to single, double, extended -#define CODE_FPPC(PC, CHANGE_CW) \ +# define CODE_FPPC(PC, CHANGE_CW) \ CODE(opc_fppc_##PC, fppc_##PC, STANY, STSTA, OPC_NONE) {\ asm("movw %0,%%ax" : "=m" (preserved_fpucw));\ asm(CHANGE_CW);\ @@ -672,7 +675,8 @@ CODE_FPPC(single, "andl $0xfcff,%eax"); CODE_FPPC(double, "andl $0xfcff,%eax\n\torl $0x0200,%eax"); CODE_FPPC(extended, "orl $0x0300,%eax"); -#endif // ! FORCE_DOUBLE_PRECISION +# endif // !FORCE_DOUBLE_PRECISION +#endif // !USE_SSE2 // throw IllegalAccessError @@ -1003,7 +1007,12 @@ #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_fload_fld, fload_fld, ST0, STSTA, OPC_NONE) { asm("subl $4,%esp\n\t"); // to simulate the true value of %esp +# ifdef USE_SSE2 + asm("movss " STR(CONST) "(%esi),%xmm0"); +# else asm("flds " STR(CONST) "(%esi)"); +# endif + #ifdef RUNTIME_DEBUG if (runtime_debug) { DEBUG_IN; @@ -1075,7 +1084,12 @@ #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_dload_dld, dload_dld, ST0, STSTA, OPC_NONE) { asm("subl $8,%esp\n\t"); // to simulate the true value of %esp +# ifdef USE_SSE2 + asm("movsd " STR(CONST) "(%esi),%xmm0"); +# else asm("fldl " STR(CONST) "(%esi)"); +# endif + #ifdef RUNTIME_DEBUG if (runtime_debug) { DEBUG_IN; @@ -1255,14 +1269,22 @@ CODE(opc_faload_fld, faload_fld, ST2, ST1, OPC_THROW) { METAVM_ALOAD(%edx, %ecx, %edx, "faload_fld_st2", 0); UNHAND(%edx, %eax); +# ifdef USE_SSE2 + asm("movss (%eax,%ecx,4),%xmm0"); +# else asm("flds (%eax,%ecx,4)"); +# endif // omit asm("subl $4,%esp"); asm("faload_fld_st2_done:"); } CODE(opc_faload_fld, faload_fld, ST4, ST1, OPC_THROW) { METAVM_ALOAD(%ecx, %edx, %ecx, "faload_fld_st4", 0); UNHAND(%ecx, %eax); +# ifdef USE_SSE2 + asm("movss (%eax,%edx,4),%xmm0"); +# else asm("flds (%eax,%edx,4)"); +# endif // omit asm("subl $4,%esp"); asm("faload_fld_st4_done:"); } @@ -1333,14 +1355,22 @@ CODE(opc_daload_dld, [ld]aload, ST2, ST2, OPC_THROW) { METAVM_ALOAD2(%edx, %ecx, %edx, %ecx, "daload_dld_st2"); UNHAND(%edx, %eax); +# ifdef USE_SSE2 + asm("movsd (%eax,%ecx,8),%xmm0"); +# else asm("fldl (%eax,%ecx,8)"); +# endif // omit asm("subl $8,%esp"); asm("daload_dld_st2_done:"); } CODE(opc_daload_dld, [ld]aload, ST4, ST2, OPC_THROW) { METAVM_ALOAD2(%ecx, %edx, %ecx, %edx, "daload_dld_st4"); UNHAND(%ecx, %eax); +# ifdef USE_SSE2 + asm("movsd (%eax,%edx,8),%xmm0"); +# else asm("fldl (%eax,%edx,8)"); +# endif // omit asm("subl $8,%esp"); asm("daload_dld_st4_done:"); } @@ -1434,7 +1464,11 @@ #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_fst_fstore, fst_fstore, ST0, ST0, OPC_NONE) { asm("addl $4,%esp"); +# ifdef USE_SSE2 + asm("movss %xmm0," STR(CONST) "(%esi)"); +# else asm("fstps " STR(CONST) "(%esi)"); +# endif } #endif @@ -1462,7 +1496,11 @@ #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_dst_dstore, dst_dstore, ST0, ST0, OPC_NONE) { asm("addl $8,%esp"); // substitution for fill_cache +# ifdef USE_SSE2 + asm("movsd %xmm0," STR(CONST) "(%esi)"); +# else asm("fstpl " STR(CONST) "(%esi)"); +# endif } #endif @@ -1559,7 +1597,11 @@ "popl %edx"); // edx = handle of array ARRAY_CHECK(%edx, %ecx, "fst_fastore"); UNHAND(%edx, %edi); +# ifdef USE_SSE2 + asm("movss %xmm0,(%edi,%ecx,4)"); +# else asm("fstps (%edi,%ecx,4)"); +# endif } #endif // OPTIMIZE_INTERNAL_CODE && !METAVM @@ -1707,11 +1749,15 @@ ARRAY_CHECK(%edi, %eax, "dst_dastore"); asm("movl 12(%esp),%edi"); UNHAND(%edi, %edi); - asm("leal (%edi,%eax,8),%edi\n\t" - "fstpl (%edi)"); + asm("leal (%edi,%eax,8),%edi"); +# ifdef USE_SSE2 + asm("movsd %xmm0,(%edi)"); +# else + asm("fstpl (%edi)"); +# endif asm("addl $16,%esp"); } -#endif OPTIMIZE_INTERNAL_CODE && !METAVM +#endif // OPTIMIZE_INTERNAL_CODE && !METAVM // bastore @@ -2365,79 +2411,119 @@ #endif CODE(opc_fadd, fadd, ST0, ST0, OPC_NONE) { - asm("addl $4,%esp\n\t" - "fadds (%esp)"); + asm("addl $4,%esp"); +#ifdef USE_SSE2 + asm("addss (%esp),%xmm0"); +#else + asm("fadds (%esp)"); +#endif } CODE(opc_fmul, fmul, ST0, ST0, OPC_NONE) { - asm("addl $4,%esp\n\t" - "fmuls (%esp)"); + asm("addl $4,%esp"); +#ifdef USE_SSE2 + asm("mulss (%esp),%xmm0"); +#else + asm("fmuls (%esp)"); +#endif } CODE(opc_fsub, fsub, ST0, ST0, OPC_NONE) { - asm("fsubs (%esp)\n\t" - "addl $4,%esp"); +#ifdef USE_SSE2 + asm("subss (%esp),%xmm0"); +#else + asm("fsubs (%esp)"); +#endif + asm("addl $4,%esp"); } CODE(opc_fdiv, fdiv, ST0, ST0, OPC_NONE) { - asm("fdivs (%esp)\n\t" - "addl $4,%esp"); +#ifdef USE_SSE2 + asm("divss (%esp),%xmm0"); +#else + asm("fdivs (%esp)"); +#endif + asm("addl $4,%esp"); } #ifdef OPTIMIZE_INTERNAL_CODE - CODE(opc_fadd, fadd, ST1, ST0, OPC_NONE) { asm("fadds (%esp)"); } - CODE(opc_fmul, fmul, ST1, ST0, OPC_NONE) { asm("fmuls (%esp)"); } + CODE(opc_fadd, fadd, ST1, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("addss (%esp),%xmm0"); +# else + asm("fadds (%esp)"); +# endif + } + + CODE(opc_fmul, fmul, ST1, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("mulss (%esp),%xmm0"); +# else + asm("fmuls (%esp)"); +# endif + } #endif -#ifdef STRICT_USE_FSCALE -# ifdef STRICT_PRELOAD -# define ARITH_FLOAT_SCALE_PREPARE asm("fld %st(1)") -# else -# ifdef STRICT_FSCALE_USE_FLOAT -# define ARITH_FLOAT_SCALE_PREPARE \ - asm("flds %0\n\t" : : "m" (single_scale_neg)) +#ifndef USE_SSE2 +# ifdef STRICT_USE_FSCALE +# ifdef STRICT_PRELOAD +# define ARITH_FLOAT_SCALE_PREPARE asm("fld %st(1)") # else -# define ARITH_FLOAT_SCALE_PREPARE \ +# ifdef STRICT_FSCALE_USE_FLOAT +# define ARITH_FLOAT_SCALE_PREPARE \ + asm("flds %0\n\t" : : "m" (single_scale_neg)) +# else +# define ARITH_FLOAT_SCALE_PREPARE \ asm("fildl %0\n\t" : : "m" (single_scale_neg)) -# endif // STRICT_FSCALE_USE_FLOAT -# endif // STRICT_PRELOAD -# define ARITH_FLOAT_SCALE_DOWN asm("fscale") -# define ARITH_FLOAT_SCALE_UP asm("fxch\n\t"\ +# endif // STRICT_FSCALE_USE_FLOAT +# endif // STRICT_PRELOAD +# define ARITH_FLOAT_SCALE_DOWN asm("fscale") +# define ARITH_FLOAT_SCALE_UP asm("fxch\n\t"\ "fchs\n\t"\ "fxch\n\t"\ "fscale") -# define ARITH_FLOAT_SCALE_SETTLE asm("ffreep %st(0)") -#else // STRICT_USE_FSCALE -# define ARITH_FLOAT_SCALE_PREPARE -# ifdef STRICT_PRELOAD -# define ARITH_FLOAT_SCALE_DOWN asm("fmul %st(4)") -# define ARITH_FLOAT_SCALE_UP asm("fmul %st(3)") -# else -# define ARITH_FLOAT_SCALE_DOWN \ +# define ARITH_FLOAT_SCALE_SETTLE asm("ffreep %st(0)") +# else // STRICT_USE_FSCALE +# define ARITH_FLOAT_SCALE_PREPARE +# ifdef STRICT_PRELOAD +# define ARITH_FLOAT_SCALE_DOWN asm("fmul %st(4)") +# define ARITH_FLOAT_SCALE_UP asm("fmul %st(3)") +# else +# define ARITH_FLOAT_SCALE_DOWN \ asm("fldt %0\n\t"\ "fmulp" : : "m" (*single_scale_neg) : "edx","ecx","esi") -# define ARITH_FLOAT_SCALE_UP \ +# define ARITH_FLOAT_SCALE_UP \ asm("fldt %0\n\t"\ "fmulp" : : "m" (*single_scale_pos) : "edx","ecx","esi") -# endif // STRICT_PRELOAD -# define ARITH_FLOAT_SCALE_SETTLE -#endif // STRICT_USE_FSCALE +# endif // STRICT_PRELOAD +# define ARITH_FLOAT_SCALE_SETTLE +# endif // STRICT_USE_FSCALE CODE(opc_strict_fprep, strict_fprep, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_SCALE_PREPARE; } +#endif // !USE_SSE2 CODE(opc_fld4, fld4, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_DEBUG1; +#ifdef USE_SSE2 + asm("movss 4(%esp),%xmm0"); +#else asm("flds 4(%esp)"); +#endif } CODE(opc_fld, fld, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_DEBUG1; +#ifdef USE_SSE2 + asm("movss (%esp),%xmm0"); +#else asm("flds (%esp)"); +#endif } +#ifndef USE_SSE2 CODE(opc_strict_fscdown, strict_fscdown, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_SCALE_DOWN; } @@ -2445,15 +2531,22 @@ CODE(opc_strict_fscup, strict_fscup, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_SCALE_UP; } +#endif // !USE_SSE2 CODE(opc_fst, fst, STANY, STSTA, OPC_NONE) { +#ifdef USE_SSE2 + asm("movss %xmm0,(%esp)"); +#else asm("fstps (%esp)"); +#endif ARITH_FLOAT_DEBUG2; } +#ifndef USE_SSE2 CODE(opc_strict_fsettle, strict_fsettle, STANY, STSTA, OPC_NONE) { ARITH_FLOAT_SCALE_SETTLE; } +#endif // dadd, dsub, dmul, ddiv @@ -2491,78 +2584,118 @@ #endif CODE(opc_dadd, dadd, ST0, ST0, OPC_NONE) { - asm("addl $8,%esp\n\t" - "faddl (%esp)"); + asm("addl $8,%esp"); +#ifdef USE_SSE2 + asm("addsd (%esp),%xmm0"); +#else + asm("faddl (%esp)"); +#endif } CODE(opc_dmul, dmul, ST0, ST0, OPC_NONE) { - asm("addl $8,%esp\n\t" - "fmull (%esp)"); + asm("addl $8,%esp"); +#ifdef USE_SSE2 + asm("mulsd (%esp),%xmm0"); +#else + asm("fmull (%esp)"); +#endif } CODE(opc_dsub, dsub, ST0, ST0, OPC_NONE) { - asm("fsubl (%esp)\n\t" - "addl $8,%esp"); +#ifdef USE_SSE2 + asm("subsd (%esp),%xmm0"); +#else + asm("fsubl (%esp)"); +#endif + asm("addl $8,%esp"); } CODE(opc_ddiv, ddiv, ST0, ST0, OPC_NONE) { - asm("fdivl (%esp)\n\t" - "addl $8,%esp"); +#ifdef USE_SSE2 + asm("divsd (%esp),%xmm0"); +#else + asm("fdivl (%esp)"); +#endif + asm("addl $8,%esp"); } #ifdef OPTIMIZE_INTERNAL_CODE - CODE(opc_dadd, dadd, ST2, ST0, OPC_NONE) { asm("faddl (%esp)"); } - CODE(opc_dmul, dmul, ST2, ST0, OPC_NONE) { asm("fmull (%esp)"); } + CODE(opc_dadd, dadd, ST2, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("addsd (%esp),%xmm0"); +# else + asm("faddl (%esp)"); +# endif + } + + CODE(opc_dmul, dmul, ST2, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("mulsd (%esp),%xmm0"); +# else + asm("fmull (%esp)"); +# endif + } #endif -#ifdef STRICT_USE_FSCALE -# ifdef STRICT_PRELOAD -# define ARITH_DOUBLE_SCALE_PREPARE asm("fld %st(0)") -# else -# ifdef STRICT_FSCALE_USE_FLOAT -# define ARITH_DOUBLE_SCALE_PREPARE \ - asm("flds %0\n\t" : : "m" (double_scale_neg)) +#ifndef USE_SSE2 +# ifdef STRICT_USE_FSCALE +# ifdef STRICT_PRELOAD +# define ARITH_DOUBLE_SCALE_PREPARE asm("fld %st(0)") # else -# define ARITH_DOUBLE_SCALE_PREPARE \ - asm("fildl %0\n\t" : : "m" (double_scale_neg)) -# endif // STRICT_FSCALE_USE_FLOAT -# endif // STRICT_PRELOAD -# define ARITH_DOUBLE_SCALE_DOWN asm("fscale") -# define ARITH_DOUBLE_SCALE_UP asm("fxch\n\t"\ +# ifdef STRICT_FSCALE_USE_FLOAT +# define ARITH_DOUBLE_SCALE_PREPARE \ + asm("flds %0\n\t" : : "m" (double_scale_neg) : "edx","ecx","esi") +# else +# define ARITH_DOUBLE_SCALE_PREPARE \ + asm("fildl %0\n\t" : : "m" (double_scale_neg) : "edx","ecx","esi") +# endif // STRICT_FSCALE_USE_FLOAT +# endif // STRICT_PRELOAD +# define ARITH_DOUBLE_SCALE_DOWN asm("fscale") +# define ARITH_DOUBLE_SCALE_UP asm("fxch\n\t"\ "fchs\n\t"\ "fxch\n\t"\ "fscale") -# define ARITH_DOUBLE_SCALE_SETTLE asm("ffreep %st(0)") -#else -# define ARITH_DOUBLE_SCALE_PREPARE -# ifdef STRICT_PRELOAD -# define ARITH_DOUBLE_SCALE_DOWN asm("fmul %st(2)") -# define ARITH_DOUBLE_SCALE_UP asm("fmul %st(1)") +# define ARITH_DOUBLE_SCALE_SETTLE asm("ffreep %st(0)") # else -# define ARITH_DOUBLE_SCALE_DOWN \ +# define ARITH_DOUBLE_SCALE_PREPARE +# ifdef STRICT_PRELOAD +# define ARITH_DOUBLE_SCALE_DOWN asm("fmul %st(2)") +# define ARITH_DOUBLE_SCALE_UP asm("fmul %st(1)") +# else +# define ARITH_DOUBLE_SCALE_DOWN \ asm("fldt %0\n\t"\ "fmulp" : : "m" (*double_scale_neg) : "edx","ecx","esi") -# define ARITH_DOUBLE_SCALE_UP \ +# define ARITH_DOUBLE_SCALE_UP \ asm("fldt %0\n\t"\ "fmulp" : : "m" (*double_scale_pos) : "edx","ecx","esi") -# endif // STRICT_PRELOAD -# define ARITH_DOUBLE_SCALE_SETTLE -#endif // STRICT_USE_FSCALE +# endif // STRICT_PRELOAD +# define ARITH_DOUBLE_SCALE_SETTLE +# endif // STRICT_USE_FSCALE CODE(opc_strict_dprep, strict_dprep, STANY, STSTA, OPC_NONE) { ARITH_DOUBLE_SCALE_PREPARE; } +#endif // !USE_SSE2 CODE(opc_dld8, dld8, STANY, STSTA, OPC_NONE) { ARITH_DOUBLE_DEBUG1; +#ifdef USE_SSE2 + asm("movsd 8(%esp),%xmm0"); +#else asm("fldl 8(%esp)"); +#endif } CODE(opc_dld, dld, STANY, STSTA, OPC_NONE) { +#ifdef USE_SSE2 + asm("movsd (%esp),%xmm0"); +#else asm("fldl (%esp)"); +#endif } +#ifndef USE_SSE2 CODE(opc_strict_dscdown, strict_dscdown, STANY, STSTA, OPC_NONE) { ARITH_DOUBLE_SCALE_DOWN; } @@ -2570,15 +2703,22 @@ CODE(opc_strict_dscup, strict_dscup, STANY, STSTA, OPC_NONE) { ARITH_DOUBLE_SCALE_UP; } +#endif // !USE_SSE2 CODE(opc_dst, dst, STANY, STSTA, OPC_NONE) { +#ifdef USE_SSE2 + asm("movsd %xmm0,(%esp)"); +#else asm("fstpl (%esp)"); +#endif ARITH_DOUBLE_DEBUG2; } +#ifndef USE_SSE2 CODE(opc_strict_dsettle, strict_dsettle, STANY, STSTA, OPC_NONE) { ARITH_DOUBLE_SCALE_SETTLE; } +#endif // frem @@ -2676,29 +2816,24 @@ // fneg - // compile: flush_cache, fld, fneg, fst - CODE(opc_fneg, fneg, ST0, ST0, OPC_NONE) { - asm("fchs"); +#define CODE_FNEG(VOP, STATE, TGT) \ + CODE(opc_##VOP, VOP, STATE, STSTA, OPC_NONE) {\ + asm("xorl $0x80000000," STR(TGT));\ } -#ifdef OPTIMIZE_INTERNAL_CODE - CODE(opc_fneg, fneg, ST1, ST0, OPC_NONE) { - asm("subl $4,%esp\n\t" - "fchs"); - } -#endif + + CODE_FNEG(fneg, ST0, (%esp)); + CODE_FNEG(fneg, ST1, %edx); + CODE_FNEG(fneg, ST2, %ecx); + CODE_FNEG(fneg, ST3, %ecx); + CODE_FNEG(fneg, ST4, %edx); // dneg - // flush_cache, dld, dneg, dst - CODE(opc_dneg, dneg, ST0, ST0, OPC_NONE) { - asm("fchs"); - } -#ifdef OPTIMIZE_INTERNAL_CODE - CODE(opc_dneg, dneg, ST2, ST0, OPC_NONE) { - asm("subl $8,%esp\n\t" - "fchs"); - } -#endif + CODE_FNEG(dneg, ST0, 4(%esp)); + CODE_FNEG(dneg, ST1, (%esp)); + CODE_FNEG(dneg, ST2, %edx); + CODE_FNEG(dneg, ST3, (%esp)); + CODE_FNEG(dneg, ST4, %ecx); // ishl @@ -2883,15 +3018,23 @@ // i2f // compile: flush_cache, i2f, fst CODE(opc_i2f, i2f, ST0, ST0, OPC_NONE) { +#ifdef USE_SSE2 + asm("cvtsi2ss (%esp),%xmm0"); +#else asm("fildl (%esp)"); +#endif } // i2d // compile: flush_cache, i2d, dst CODE(opc_i2d, i2d, ST0, ST0, OPC_NONE) { - asm("fildl (%esp)\n\t" - "subl $4,%esp"); +#ifdef USE_SSE2 + asm("cvtsi2sd (%esp),%xmm0"); +#else + asm("fildl (%esp)"); +#endif + asm("subl $4,%esp"); } @@ -2911,17 +3054,23 @@ // l2f - // compile: flush_cache, l2f, fst + // compile: flush_cache, l2f, (fst if USE_SSE2 is not defined) CODE(opc_l2f, l2f, ST0, ST0, OPC_NONE) { asm("fildll (%esp)\n\t" "addl $4,%esp"); +#ifdef USE_SSE2 + asm("fstps (%esp)"); // opc_fst (!USE_SSE2) +#endif } // l2d - // compile: flush_cache, l2d, dst + // compile: flush_cache, l2d, (dst if USE_SSE2 is not defined) CODE(opc_l2d, l2d, ST0, ST0, OPC_NONE) { asm("fildll (%esp)"); +#ifdef USE_SSE2 + asm("fstpl (%esp)"); // opc_dst (!USE_SSE2) +#endif } @@ -3046,10 +3195,16 @@ // f2d // compile: flush_cache, fld, f2d, dst CODE(opc_f2d, f2d, ST0, ST0, OPC_NONE) { +#ifdef USE_SSE2 + asm("cvtss2sd %xmm0,%xmm0"); +#endif asm("subl $4,%esp"); } #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_f2d, f2d, ST1, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("cvtss2sd %xmm0,%xmm0"); +# endif asm("subl $8,%esp"); } #endif @@ -3058,10 +3213,16 @@ // d2f // compile: flush_cache, dld, d2f, fst CODE(opc_d2f, d2f, ST0, ST0, OPC_NONE) { +#ifdef USE_SSE2 + asm("cvtsd2ss %xmm0,%xmm0"); +#endif asm("addl $4,%esp"); } #ifdef OPTIMIZE_INTERNAL_CODE CODE(opc_d2f, d2f, ST2, ST0, OPC_NONE) { +# ifdef USE_SSE2 + asm("cvtsd2ss %xmm0,%xmm0"); +# endif asm("subl $4,%esp"); } #endif @@ -4189,7 +4350,7 @@ asm("inv_metavm_inv_local:"); } -#endif METAVM +#endif // METAVM CODE(opc_inv_spe_obj, inv_spe_obj, STANY, STSTA, OPC_SIGNAL) { @@ -5053,20 +5214,20 @@ MONITOR_DEBUG; #define CODE_MONITOR(vop, FUNCNAME, METAVM_FUNCNAME) \ - CODE(opc_##vop, ##vop, ST0, ST0, OPC_SIGNAL) {\ + CODE(opc_##vop, vop, ST0, ST0, OPC_SIGNAL) {\ asm("popl %edx"); /* now state 1 */\ MONITOR(%edx, FUNCNAME, METAVM_FUNCNAME, #vop "_st0", 0);\ }\ - CODE(opc_##vop, ##vop, ST1, ST0, OPC_SIGNAL) {\ + CODE(opc_##vop, vop, ST1, ST0, OPC_SIGNAL) {\ MONITOR(%edx, FUNCNAME, METAVM_FUNCNAME, #vop "_st1", 0);\ }\ - CODE(opc_##vop, ##vop, ST2, ST1, OPC_SIGNAL) {\ + CODE(opc_##vop, vop, ST2, ST1, OPC_SIGNAL) {\ MONITOR(%ecx, FUNCNAME, METAVM_FUNCNAME, #vop "_st2", 1);\ }\ - CODE(opc_##vop, ##vop, ST3, ST0, OPC_SIGNAL) {\ + CODE(opc_##vop, vop, ST3, ST0, OPC_SIGNAL) {\ MONITOR(%ecx, FUNCNAME, METAVM_FUNCNAME, #vop "_st3", 0);\ }\ - CODE(opc_##vop, ##vop, ST4, ST3, OPC_SIGNAL) {\ + CODE(opc_##vop, vop, ST4, ST3, OPC_SIGNAL) {\ MONITOR(%edx, FUNCNAME, METAVM_FUNCNAME, #vop "_st4", 3);\ } @@ -5299,7 +5460,14 @@ asm("call " SYMBOL(FUNC) "@PLT\n\t"\ "fstpl (%esp)") -#define JMATH_SQRT_ST0 JMATH_DIRECT_ST0("fsqrt") +#ifdef USE_SSE2 +# define JMATH_SQRT_ST0 \ + asm("movsd (%esp),%xmm0\n\t"\ + "sqrtsd %xmm0,%xmm0\n\t"\ + "movsd %xmm0,(%esp)"); +#else +# define JMATH_SQRT_ST0 JMATH_DIRECT_ST0("fsqrt") +#endif // USE_SSE2 #if 0 #define JMATH_SIN_ST0 JMATH_DIRECT_ST0("fsin") #define JMATH_COS_ST0 JMATH_DIRECT_ST0("fcos") diff -aruN shujit-0.7.1/compile.c shujit/compile.c --- shujit-0.7.1/compile.c Sun Mar 18 18:06:42 2001 +++ shujit/compile.c Tue Jun 26 19:28:25 2001 @@ -510,17 +510,19 @@ } #endif // METAVM +#ifndef USE_SSE2 if (((mb->fb.access & ACC_STRICT) && !OPT_SETQ(OPT_IGNSTRICTFP)) || OPT_SETQ(OPT_FRCSTRICTFP)) { -#ifndef FORCE_DOUBLE_PRECISION +# ifndef FORCE_DOUBLE_PRECISION if (!is_fpupc_double) { processAnOpcode(cc, opc_fppc_save, -1); processAnOpcode(cc, opc_fppc_double, -1); } -#endif // ! FORCE_DOUBLE_PRECISION +# endif // !FORCE_DOUBLE_PRECISION processAnOpcode(cc, opc_strict_enter, -1); } +#endif // !USE_SSE2 if ((mb->fb.access & ACC_SYNCHRONIZED) && !OPT_SETQ(OPT_IGNLOCK)) { @@ -584,6 +586,7 @@ { int opcode_fld = ((opcode == opc_fmul) ? opc_fld : opc_fld4); +#ifndef USE_SSE2 if (OPT_SETQ(OPT_FRCSTRICTFP) || (!OPT_SETQ(OPT_IGNSTRICTFP) && (mb->fb.access & ACC_STRICT))) { processAnOpcode(cc, opc_flush_cache, byteoff); @@ -595,7 +598,9 @@ processAnOpcode(cc, opc_fst, byteoff); processAnOpcode(cc, opc_strict_fsettle, byteoff); } - else { + else +#endif // !USE_SSE2 + { processAnOpcode(cc, opc_flush_cache, byteoff); processAnOpcode(cc, opcode_fld, byteoff); byteinc = processAnOpcode(cc, opcode, byteoff); @@ -619,6 +624,7 @@ { int opcode_dld = ((opcode == opc_dmul) ? opc_dld : opc_dld8); +#ifndef USE_SSE2 if (OPT_SETQ(OPT_FRCSTRICTFP) || (!OPT_SETQ(OPT_IGNSTRICTFP) && (mb->fb.access & ACC_STRICT))) { processAnOpcode(cc, opc_flush_cache, byteoff); @@ -630,7 +636,9 @@ processAnOpcode(cc, opc_dst, byteoff); processAnOpcode(cc, opc_strict_dsettle, byteoff); } - else { + else +#endif // !USE_SSE2 + { processAnOpcode(cc, opc_flush_cache, byteoff); processAnOpcode(cc, opcode_dld, byteoff); byteinc = processAnOpcode(cc, opcode, byteoff); @@ -640,7 +648,10 @@ } break; - case opc_i2f: case opc_l2f: case opc_frem: + case opc_i2f: case opc_frem: +#ifndef USE_SSE2 + case opc_l2f: +#endif processAnOpcode(cc, opc_flush_cache, byteoff); byteinc = processAnOpcode(cc, opcode, byteoff); processAnOpcode(cc, opc_fst, byteoff); @@ -654,28 +665,24 @@ byteoff += byteinc; break; - case opc_fneg: - processAnOpcode(cc, opc_flush_cache, byteoff); - processAnOpcode(cc, opc_fld, byteoff); - byteinc = processAnOpcode(cc, opcode, byteoff); - processAnOpcode(cc, opc_fst, byteoff); - byteoff += byteinc; - break; - - case opc_dneg: + case opc_i2d: +#ifndef USE_SSE2 + case opc_l2d: +#endif processAnOpcode(cc, opc_flush_cache, byteoff); - processAnOpcode(cc, opc_dld, byteoff); byteinc = processAnOpcode(cc, opcode, byteoff); processAnOpcode(cc, opc_dst, byteoff); byteoff += byteinc; break; - case opc_i2d: case opc_l2d: +#ifdef USE_SSE2 + case opc_l2f: + case opc_l2d: processAnOpcode(cc, opc_flush_cache, byteoff); byteinc = processAnOpcode(cc, opcode, byteoff); - processAnOpcode(cc, opc_dst, byteoff); byteoff += byteinc; break; +#endif case opc_f2d: processAnOpcode(cc, opc_flush_cache, byteoff); @@ -1112,16 +1119,18 @@ processAnOpcode(cc, opc_sync_obj_exit, INT_MAX); } +#ifndef USE_SSE2 if (((mb->fb.access & ACC_STRICT) && !OPT_SETQ(OPT_IGNSTRICTFP)) || OPT_SETQ(OPT_FRCSTRICTFP)) { -#ifndef FORCE_DOUBLE_PRECISION +# ifndef FORCE_DOUBLE_PRECISION if (!is_fpupc_double) { processAnOpcode(cc, opc_fppc_restore, INT_MAX); } -#endif // ! FORCE_DOUBLE_PRECISION +# endif // ! FORCE_DOUBLE_PRECISION processAnOpcode(cc, opc_strict_exit, INT_MAX); } +#endif // !USE_SSE2 processAnOpcode(cc, opc_methodtail, INT_MAX); diff -aruN shujit-0.7.1/compiler.h shujit/compiler.h --- shujit-0.7.1/compiler.h Sun Mar 18 18:08:44 2001 +++ shujit/compiler.h Mon Jul 2 15:18:17 2001 @@ -122,6 +122,8 @@ # else # define STACKOVERFLOW_MARGIN 3400 # endif +# else +# define STACKOVERFLOW_MARGIN 200 # endif #endif @@ -466,13 +468,13 @@ // in signal.c #if (defined(EXC_BY_SIGNAL) || defined(GET_SIGCONTEXT)) && defined(SEARCH_SIGCONTEXT) extern int sc_nest; -#endif; +#endif // in code.c // for the precise floating-point semantics, i.e. strictfp #undef STRICT_PRELOAD // It has a preload to preload scales into FPU register yet. -#define STRICT_USE_FSCALE +#undef STRICT_USE_FSCALE #define STRICT_FSCALE_USE_FLOAT #if 0 diff -aruN shujit-0.7.1/config.h.in shujit/config.h.in --- shujit-0.7.1/config.h.in Thu Aug 17 23:43:14 2000 +++ shujit/config.h.in Mon Jun 25 21:44:45 2001 @@ -26,6 +26,9 @@ // defined if DB library for code DB is gdbm #undef GDBM +// defined if use SSE2 instructions instead of x87 +#undef USE_SSE2 + // defined if shuJIT supports thread migration #undef MOBA diff -aruN shujit-0.7.1/configure shujit/configure --- shujit-0.7.1/configure Wed Jan 10 14:47:26 2001 +++ shujit/configure Mon Jun 25 21:44:57 2001 @@ -14,6 +14,9 @@ ac_help="$ac_help --disable-codedb disable code DB " ac_help="$ac_help + --enable-sse2 + use SSE2 instructions instead of x87 " +ac_help="$ac_help --enable-moba support thread migration " ac_help="$ac_help @@ -585,7 +588,7 @@ fi echo $ac_n "checking host system type""... $ac_c" 1>&6 -echo "configure:589: checking host system type" >&5 +echo "configure:592: checking host system type" >&5 host_alias=$host case "$host_alias" in @@ -606,7 +609,7 @@ echo "$ac_t""$host" 1>&6 echo $ac_n "checking target system type""... $ac_c" 1>&6 -echo "configure:610: checking target system type" >&5 +echo "configure:613: checking target system type" >&5 target_alias=$target case "$target_alias" in @@ -624,7 +627,7 @@ echo "$ac_t""$target" 1>&6 echo $ac_n "checking build system type""... $ac_c" 1>&6 -echo "configure:628: checking build system type" >&5 +echo "configure:631: checking build system type" >&5 build_alias=$build case "$build_alias" in @@ -657,7 +660,7 @@ # Extract the first word of "libgdbm.so", so it can be a program name with args. set dummy libgdbm.so; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:661: checking for $ac_word" >&5 +echo "configure:664: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_ac_libgdbm'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -694,7 +697,7 @@ # Extract the first word of "libndbm.so", so it can be a program name with args. set dummy libndbm.so; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:698: checking for $ac_word" >&5 +echo "configure:701: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_ac_libndbm'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -737,6 +740,21 @@ +# Check whether --enable-sse2 or --disable-sse2 was given. +if test "${enable_sse2+set}" = set; then + enableval="$enable_sse2" + + ac_sse2=yes + cat >> confdefs.h <<\EOF +#define USE_SSE2 1 +EOF + + +fi + + + + # Check whether --enable-moba or --disable-moba was given. if test "${enable_moba+set}" = set; then enableval="$enable_moba" @@ -788,7 +806,7 @@ echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:792: checking whether ${MAKE-make} sets \${MAKE}" >&5 +echo "configure:810: checking whether ${MAKE-make} sets \${MAKE}" >&5 set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 @@ -815,7 +833,7 @@ fi echo $ac_n "checking C compiler""... $ac_c" 1>&6 -echo "configure:819: checking C compiler" >&5 +echo "configure:837: checking C compiler" >&5 ac_cc=no if test -n "$CC"; then if test -f "$CC"; then @@ -824,7 +842,7 @@ # Extract the first word of "$CC", so it can be a program name with args. set dummy $CC; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:828: checking for $ac_word" >&5 +echo "configure:846: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_cc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -865,7 +883,7 @@ # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:869: checking for $ac_word" >&5 +echo "configure:887: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_cc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -919,7 +937,7 @@ # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:923: checking for $ac_word" >&5 +echo "configure:941: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_objdump'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -962,7 +980,7 @@ # Extract the first word of "ruby", so it can be a program name with args. set dummy ruby; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:966: checking for $ac_word" >&5 +echo "configure:984: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_ruby'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1002,7 +1020,7 @@ # Extract the first word of "ci", so it can be a program name with args. set dummy ci; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1006: checking for $ac_word" >&5 +echo "configure:1024: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_ci'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1039,7 +1057,7 @@ # Extract the first word of "co", so it can be a program name with args. set dummy co; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1043: checking for $ac_word" >&5 +echo "configure:1061: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_co'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1076,7 +1094,7 @@ # Extract the first word of "mv", so it can be a program name with args. set dummy mv; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1080: checking for $ac_word" >&5 +echo "configure:1098: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_mv'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1113,7 +1131,7 @@ # Extract the first word of "rm", so it can be a program name with args. set dummy rm; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1117: checking for $ac_word" >&5 +echo "configure:1135: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_rm'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1150,7 +1168,7 @@ # Extract the first word of "wc", so it can be a program name with args. set dummy wc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1154: checking for $ac_word" >&5 +echo "configure:1172: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_wc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1187,7 +1205,7 @@ # Extract the first word of "etags", so it can be a program name with args. set dummy etags; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1191: checking for $ac_word" >&5 +echo "configure:1209: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_etags'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1225,7 +1243,7 @@ # Extract the first word of "which", so it can be a program name with args. set dummy which; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1229: checking for $ac_word" >&5 +echo "configure:1247: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_which'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1263,7 +1281,7 @@ # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1267: checking for $ac_word" >&5 +echo "configure:1285: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_grep'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1302,7 +1320,7 @@ # Extract the first word of "sed", so it can be a program name with args. set dummy sed; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1306: checking for $ac_word" >&5 +echo "configure:1324: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_sed'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1340,7 +1358,7 @@ # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1344: checking for $ac_word" >&5 +echo "configure:1362: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_AWK'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1372,7 +1390,7 @@ echo $ac_n "checking version of gcc""... $ac_c" 1>&6 -echo "configure:1376: checking version of gcc" >&5 +echo "configure:1394: checking version of gcc" >&5 ac_ccver_string=`$ac_cc -v 2>&1 | $ac_grep 'gcc version' | $AWK '{print $3}' | $ac_sed 's/.*-//'` case "$ac_ccver_string" in [3-9].*|2.9*) @@ -1399,7 +1417,7 @@ echo $ac_n "checking install path of JDK""... $ac_c" 1>&6 -echo "configure:1403: checking install path of JDK" >&5 +echo "configure:1421: checking install path of JDK" >&5 # Check whether --with-jdk or --without-jdk was given. if test "${with_jdk+set}" = set; then withval="$with_jdk" @@ -1417,7 +1435,7 @@ # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1421: checking for $ac_word" >&5 +echo "configure:1439: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_java'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1460,7 +1478,7 @@ # Extract the first word of "javac", so it can be a program name with args. set dummy javac; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1464: checking for $ac_word" >&5 +echo "configure:1482: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_javac'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1497,7 +1515,7 @@ # Extract the first word of "javah", so it can be a program name with args. set dummy javah; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1501: checking for $ac_word" >&5 +echo "configure:1519: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_javah'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1534,7 +1552,7 @@ # Extract the first word of "jar", so it can be a program name with args. set dummy jar; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1538: checking for $ac_word" >&5 +echo "configure:1556: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_jar'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1571,7 +1589,7 @@ # Extract the first word of "javadoc", so it can be a program name with args. set dummy javadoc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1575: checking for $ac_word" >&5 +echo "configure:1593: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ac_javadoc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else @@ -1607,7 +1625,7 @@ echo $ac_n "checking version of JDK""... $ac_c" 1>&6 -echo "configure:1611: checking version of JDK" >&5 +echo "configure:1629: checking version of JDK" >&5 ac_jver_string=`$ac_java -Djava.compiler= -version 2>&1` case "$ac_jver_string" in *\"1.1*) @@ -1640,7 +1658,7 @@ echo $ac_n "checking asm or C, which version of interpreter is used""... $ac_c" 1>&6 -echo "configure:1644: checking asm or C, which version of interpreter is used" >&5 +echo "configure:1662: checking asm or C, which version of interpreter is used" >&5 # Check whether --enable-c-interpreter or --disable-c-interpreter was given. if test "${enable_c_interpreter+set}" = set; then enableval="$enable_c_interpreter" @@ -1680,9 +1698,9 @@ echo $ac_n "checking version of libc""... $ac_c" 1>&6 -echo "configure:1684: checking version of libc" >&5 +echo "configure:1702: checking version of libc" >&5 echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1686: checking how to run the C preprocessor" >&5 +echo "configure:1704: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= @@ -1697,13 +1715,13 @@ # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1707: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1725: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1714,13 +1732,13 @@ rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1724: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1742: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1731,13 +1749,13 @@ rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1741: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } +{ (eval echo configure:1759: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : @@ -1762,7 +1780,7 @@ echo "$ac_t""$CPP" 1>&6 cat > conftest.$ac_ext < @@ -1789,7 +1807,7 @@ echo $ac_n "checking type of an argument of monitorEnter()""... $ac_c" 1>&6 -echo "configure:1793: checking type of an argument of monitorEnter()" >&5 +echo "configure:1811: checking type of an argument of monitorEnter()" >&5 ac_monitor_t=`$ac_grep monitorEnter $ac_jincdir/monitor.h | $ac_sed 's/(/ /' | $ac_sed "s/.* \(.*\));.*/\1/"` echo "$ac_t""$ac_monitor_t" 1>&6 cat >> confdefs.h < header exists""... $ac_c" 1>&6 -echo "configure:1802: checking whether the header exists" >&5 +echo "configure:1820: checking whether the header exists" >&5 if test -f "/usr/include/asm/ucontext.h"; then echo "$ac_t""exists" 1>&6 cat >> confdefs.h <<\EOF @@ -1810,7 +1828,7 @@ fi echo $ac_n "checking whether the JDK_HOME/include/green_threads directory exists""... $ac_c" 1>&6 -echo "configure:1814: checking whether the JDK_HOME/include/green_threads directory exists" >&5 +echo "configure:1832: checking whether the JDK_HOME/include/green_threads directory exists" >&5 if test -d "$ac_jincdir/green_threads"; then echo "$ac_t""exists" 1>&6 cat >> confdefs.h <<\EOF @@ -1974,6 +1992,7 @@ s%@ac_libgdbm@%$ac_libgdbm%g s%@ac_libndbm@%$ac_libndbm%g s%@ac_codedb@%$ac_codedb%g +s%@ac_sse2@%$ac_sse2%g s%@ac_moba@%$ac_moba%g s%@ac_metavm@%$ac_metavm%g s%@ac_metavm_no_array@%$ac_metavm_no_array%g diff -aruN shujit-0.7.1/configure.in shujit/configure.in --- shujit-0.7.1/configure.in Wed Jan 10 14:46:22 2001 +++ shujit/configure.in Mon Jun 25 21:44:27 2001 @@ -30,6 +30,17 @@ AC_SUBST(ac_libgdbm) +AC_ARG_ENABLE(sse2, + [ --enable-sse2 + use SSE2 instructions instead of x87 ], + [ + ac_sse2=yes + AC_DEFINE(USE_SSE2) + ], + []) +AC_SUBST(ac_sse2) + + AC_ARG_ENABLE(moba, [ --enable-moba support thread migration ], diff -aruN shujit-0.7.1/optimize.c shujit/optimize.c --- shujit-0.7.1/optimize.c Sun Mar 18 18:04:30 2001 +++ shujit/optimize.c Tue Jun 26 19:29:28 2001 @@ -439,9 +439,12 @@ fflush(stdout); } #endif - if ((inlined_info->inlineability >= INLINE_MAY) && - ((mb->fb.access & ACC_STRICT) == - (method->fb.access & ACC_STRICT))) { + if ((inlined_info->inlineability >= INLINE_MAY) +#ifndef USE_SSE2 + && ((mb->fb.access & ACC_STRICT) == + (method->fb.access & ACC_STRICT)) +#endif // USE_SSE2 + ) { // do inining pcentry *inlined_pctable; int inlined_pctablelen; diff -aruN shujit-0.7.1/runtime.c shujit/runtime.c --- shujit-0.7.1/runtime.c Sun Mar 11 16:12:14 2001 +++ shujit/runtime.c Tue Jun 26 16:42:13 2001 @@ -271,11 +271,10 @@ #ifdef RUNTIME_DEBUG if (runtime_debug) { asm("pushal\n\t" - "pushl (%ecx)\n\t" "pushl %ecx"); - PUSH_CONSTSTR(" obj: %x, (obj): %x\n"); + PUSH_CONSTSTR(" obj: %x\n"); asm("call printf@PLT\n\t" - "addl $12,%esp"); + "addl $8,%esp"); asm("popal"); } #endif @@ -667,7 +666,7 @@ asm("popal\n\taddl $200,%esp"); } asm( -#endif RUNTIME_DEBUG +#endif // RUNTIME_DEBUG "jmp nmiret_done\n\t" "nmiret_fp64:\n\t" "fstpl (%%esi)\n\t" diff -aruN shujit-0.7.1/signal.c shujit/signal.c --- shujit-0.7.1/signal.c Sun Mar 11 16:12:20 2001 +++ shujit/signal.c Fri Jun 29 17:55:21 2001 @@ -317,7 +317,9 @@ if (!exceptionOccurred(ee)) { # ifdef CAUSE_STACKOVERFLOW unsigned char *pc = (unsigned char *)SIGCONTEXT(sc, eip); - if ((pc[0] == 0x8b) && (pc[1] == 0x8d)) { + if ((pc[0] == 0x8b) && (pc[1] == 0x8d) && + ((pc[2] + (pc[3] << 8) + (pc[4] << 16) + (pc[5] << 24)) + == -STACKOVERFLOW_MARGIN)) { // movl -STACKOVERFLOW_MARGIN(%ebp),%ecx SignalError(NULL, JAVAPKG "StackOverflowError", 0); } diff -aruN shujit-0.7.1/txt/benchmark-P4 shujit/txt/benchmark-P4 --- shujit-0.7.1/txt/benchmark-P4 Thu Jan 1 09:00:00 1970 +++ shujit/txt/benchmark-P4 Tue Jun 26 19:02:01 2001 @@ -0,0 +1,23 @@ +Pentium 4 / 1.5GHz + pen4.muraoka.info.waseda.ac.jp + +[Linpack Benchmark -- Java Version] + + 500 x 500, JDK 1.2.2 FCS, green threads + +Client, 1.3.1 154.96 0.54 +Server, 14beta 133.493 0.63 +Server, 1.3.1 133.069 0.63 +Client, 14beta 126.445 0.66 +IBM 1.3.0 51.243 1.64 +shu 010626 SSE2 39.083 2.15 +shu 010626 x87 37.21 2.26 +TYA 1.7v2 33.547 2.5 +Inprise 1.2.15 26.313 3.19 +OpenJIT 1.1.15 18.494 4.53 +sunwjit 8.059 10.4 +interpreter 8.025 10.45 + +環境: + glibc-2.2.2-10, gcc-2.95.2-4k10, gcc-java-2.95.2-4k10, gcc-libgcj-2.95.2-4k10 +GCJ のコード, IBM JDK 1.1.8 がハングアップ。 diff -aruN shujit-0.7.1/txt/memo shujit/txt/memo --- shujit-0.7.1/txt/memo Sun Mar 11 16:09:07 2001 +++ shujit/txt/memo Mon Jul 2 15:20:35 2001 @@ -1,4 +1,6 @@ Todo + - SSE2 対応の性能テスト + Math.sqrt(), 四則演算 - 特定メソッドの inlining。 profiling (javac, Linpack 等) に基づいて。 System#arraycopy, Object#getClass, Object#hashCode diff -aruN shujit-0.7.1/txt/quick shujit/txt/quick --- shujit-0.7.1/txt/quick Fri Apr 23 15:47:30 1999 +++ shujit/txt/quick Wed Jun 27 10:51:49 2001 @@ -1,44 +1,72 @@ -_quick 命令への書き換え +_quick 疑似命令への変換 -[quickInvocation() in interpreter.c] +[quickFieldAccess() in interpreter.c] -if invokestatic - -> invokestatic_quick -else if invokevirtual - if private - -> invokenonvirtual_quck - else if (offset < 256) && !UseLosslessQuickOpcodes - -> invokevirtual_quick - else - -> invokevirtual_quick_w -else if invokespecial - if index が直接対象メソッドを指している - -> invokenonvirtual_quick - else - -> invokesuper_quick +getfield + if (offset >= 256) || UseLosslessQuickOpcodes + getfield_quick_w + else if (long か double) + getfield2_quick + else + getfield_quick +putfield getfield と同じ選択方法 + putfield_quick + putfield2_quick + putfield_quick_w + +[quickStaticAccess() in interpreter.c] + +getstatic + if (long か double) + getstatic2_quick + else + getstatic_quick +putstatic getstatic と同じ選択方法 + putstatic_quick + putstatic2_quick -[makeReturnResult() in inline.c (called by MethodInlininig())] +[quickInvocation() in interpreter.c] -invokeignored_quick -..... +invokespecial + if (同一クラスに対する呼び出し) + invokenonvirtual_quick + else + invokesuper_quick +invokestatic + invokestatic_quick +invokevirtual + if private + invokenonvirtual_quick + else if (offset >= 256) || UseLosslessQuickOpcodes + invokevirtual_quick_w + else if java.lang.Object のメソッドである + invokevirtualobject_quick + else + invokevirtual_quick + +[makeReturnResult() in inline.c] + +invoke*? + if 呼び出し不要 + invokeignored_quick [executeJava()] ldc - ldc_quick + ldc_quick ldc_w - ldc_w_quick + ldc_w_quick ldc2_w - ldc2_w_quick + ldc2_w_quick invokeinterface - invokeinterface_quick + invokeinterface_quick instanceof - instanceof_quick + instanceof_quick checkcast - checkcast_quick + checkcast_quick new - new_quick + new_quick anewarray - anewarray_quick + anewarray_quick multianewarray - multianewarray_quick + multianewarray_quick