diff options
author | Mark Loeser <halcy0n@gentoo.org> | 2005-09-05 21:41:17 +0000 |
---|---|---|
committer | Mark Loeser <halcy0n@gentoo.org> | 2005-09-05 21:41:17 +0000 |
commit | 9ad2547b321d118d5261359c3295e589aa687468 (patch) | |
tree | 036f3a619a1a89c252adb7c93ef82850a56ce1ed /games-emulation/openmsx/files | |
parent | ~amd64 (diff) | |
download | gentoo-2-9ad2547b321d118d5261359c3295e589aa687468.tar.gz gentoo-2-9ad2547b321d118d5261359c3295e589aa687468.tar.bz2 gentoo-2-9ad2547b321d118d5261359c3295e589aa687468.zip |
Adding upstream patch to fix compilation with GCC4. Some slight changes were
(Portage version: 2.0.51.22-r2)
Diffstat (limited to 'games-emulation/openmsx/files')
-rw-r--r-- | games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch | 758 |
1 files changed, 758 insertions, 0 deletions
diff --git a/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch b/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch new file mode 100644 index 000000000000..f217f744d204 --- /dev/null +++ b/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch @@ -0,0 +1,758 @@ +diff -ur openmsx-0.5.2-orig/src/sound/Y8950.cc openmsx-0.5.2/src/sound/Y8950.cc +--- openmsx-0.5.2-orig/src/sound/Y8950.cc 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/sound/Y8950.cc 2005-09-03 22:31:27.000000000 -0400 +@@ -18,6 +18,22 @@ + + namespace openmsx { + ++static const double PI = 3.14159265358979; ++static const double EG_STEP = 0.1875; ++static const double SL_STEP = 3.0; ++static const double TL_STEP = 0.75; ++static const double DB_STEP = 0.1875; ++ ++// PM speed(Hz) and depth(cent) ++static const double PM_SPEED = 6.4; ++static const double PM_DEPTH = 13.75 / 2; ++static const double PM_DEPTH2 = 13.75; ++ ++// AM speed(Hz) and depth(dB) ++static const double AM_SPEED = 3.7; ++static const double AM_DEPTH = 1.0; ++static const double AM_DEPTH2 = 4.8; ++ + short Y8950::dB2LinTab[(2*DB_MUTE)*2]; + int Y8950::Slot::sintable[PG_WIDTH]; + int Y8950::Slot::tllTable[16][8][1<<TL_BITS][4]; +diff -ur openmsx-0.5.2-orig/src/sound/Y8950.hh openmsx-0.5.2/src/sound/Y8950.hh +--- openmsx-0.5.2-orig/src/sound/Y8950.hh 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/sound/Y8950.hh 2005-09-03 22:31:27.000000000 -0400 +@@ -115,11 +115,9 @@ + + + // Dynamic range of envelope +- static const double EG_STEP = 0.1875; + static const int EG_BITS = 9; + static const int EG_MUTE = 1<<EG_BITS; + // Dynamic range of sustine level +- static const double SL_STEP = 3.0; + static const int SL_BITS = 4; + static const int SL_MUTE = 1<<SL_BITS; + // Size of Sintable ( 1 -- 18 can be used, but 7 -- 14 recommended.) +@@ -133,7 +131,6 @@ + static const int EG_DP_BITS = 23; + static const int EG_DP_WIDTH = 1<<EG_DP_BITS; + // Dynamic range of total level +- static const double TL_STEP = 0.75; + static const int TL_BITS = 6; + static const int TL_MUTE = 1<<TL_BITS; + +@@ -193,7 +190,6 @@ + // Definition of envelope mode + enum { ATTACK,DECAY,SUSHOLD,SUSTINE,RELEASE,FINISH }; + // Dynamic range +- static const double DB_STEP = 0.1875; + static const int DB_BITS = 9; + static const int DB_MUTE = 1<<DB_BITS; + // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200) +@@ -264,15 +260,6 @@ + Slot *slot[18]; + + static const int CLK_FREQ = 3579545; +- static const double PI = 3.14159265358979; +- // PM speed(Hz) and depth(cent) +- static const double PM_SPEED = 6.4; +- static const double PM_DEPTH = (13.75/2); +- static const double PM_DEPTH2 = 13.75; +- // AM speed(Hz) and depth(dB) +- static const double AM_SPEED = 3.7; +- static const double AM_DEPTH = 1.0; +- static const double AM_DEPTH2 = 4.8; + // Bits for liner value + static const int DB2LIN_AMP_BITS = 11; + static const int SLOT_AMP_BITS = DB2LIN_AMP_BITS; +diff -ur openmsx-0.5.2-orig/src/sound/YM2413.cc openmsx-0.5.2/src/sound/YM2413.cc +--- openmsx-0.5.2-orig/src/sound/YM2413.cc 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/sound/YM2413.cc 2005-09-03 22:31:27.000000000 -0400 +@@ -20,6 +20,18 @@ + + static const int CLOCK_FREQ = 3579545; + static const double PI = 3.14159265358979323846; ++static const double DB_STEP = 48.0 / (1 << 8); // 48 / (1 << DB_BITS) ++static const double EG_STEP = 0.375; ++static const double TL_STEP = 0.75; ++static const double SL_STEP = 3.0; ++ ++// PM speed(Hz) and depth(cent) ++static const double PM_SPEED = 6.4; ++static const double PM_DEPTH = 13.75; ++ ++// AM speed(Hz) and depth(dB) ++static const double AM_SPEED = 3.6413; ++static const double AM_DEPTH = 4.875; + + int YM2413::pmtable[PM_PG_WIDTH]; + int YM2413::amtable[AM_PG_WIDTH]; +diff -ur openmsx-0.5.2-orig/src/sound/YM2413.hh openmsx-0.5.2/src/sound/YM2413.hh +--- openmsx-0.5.2-orig/src/sound/YM2413.hh 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/sound/YM2413.hh 2005-09-03 22:31:27.000000000 -0400 +@@ -11,13 +11,14 @@ + namespace openmsx { + + class EmuTime; ++class MSXMotherBoard; + + class YM2413 : public YM2413Core, private SoundDevice, private Debuggable + { + struct Patch { + Patch(); + Patch(int n, const byte* data); +- ++ + bool AM, PM, EG; + byte KR; // 0-1 + byte ML; // 0-15 +@@ -174,21 +175,17 @@ + + // Dynamic range (Accuracy of sin table) + static const int DB_BITS = 8; +- static const double DB_STEP = 48.0 / (1 << DB_BITS); + static const int DB_MUTE = 1 << DB_BITS; + + // Dynamic range of envelope +- static const double EG_STEP = 0.375; + static const int EG_BITS = 7; + static const int EG_MUTE = 1 << EG_BITS; + + // Dynamic range of total level +- static const double TL_STEP = 0.75; + static const int TL_BITS = 6; + static const int TL_MUTE = 1 << TL_BITS; + + // Dynamic range of sustine level +- static const double SL_STEP = 3.0; + static const int SL_BITS = 4; + static const int SL_MUTE = 1 << SL_BITS; + +@@ -210,18 +207,10 @@ + static const int AM_DP_BITS = 16; + static const int AM_DP_WIDTH = 1 << AM_DP_BITS; + +- // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200) ++ // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200) + static const int PM_AMP_BITS = 8; + static const int PM_AMP = 1 << PM_AMP_BITS; + +- // PM speed(Hz) and depth(cent) +- static const double PM_SPEED = 6.4; +- static const double PM_DEPTH = 13.75; +- +- // AM speed(Hz) and depth(dB) +- static const double AM_SPEED = 3.6413; +- static const double AM_DEPTH = 4.875; +- + int maxVolume; + + // Register +Only in openmsx-0.5.2-orig/src/sound: YM2413.hh.orig +diff -ur openmsx-0.5.2-orig/src/video/Scale2xScaler.cc openmsx-0.5.2/src/video/Scale2xScaler.cc +--- openmsx-0.5.2-orig/src/video/Scale2xScaler.cc 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/video/Scale2xScaler.cc 2005-09-03 22:31:27.000000000 -0400 +@@ -132,8 +132,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -254,8 +256,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -364,8 +368,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -570,8 +576,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -702,8 +710,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -805,8 +815,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -922,8 +934,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +@@ -1037,8 +1051,10 @@ + , "r" (src2) // 2 + , "r" (dst) // 3 + : "eax" ++ #ifdef __MMX__ + , "mm0", "mm1", "mm2", "mm3" + , "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + }; +diff -ur openmsx-0.5.2-orig/src/video/Scaler.cc openmsx-0.5.2/src/video/Scaler.cc +--- openmsx-0.5.2-orig/src/video/Scaler.cc 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/video/Scaler.cc 2005-09-03 22:40:18.000000000 -0400 +@@ -86,8 +86,11 @@ + , "r" (pOut) // 1 + , "r" (nBytes) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3" , ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -126,8 +129,11 @@ + , "r" (pOut) // 1 + , "r" (nBytes) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3", ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -195,8 +201,11 @@ + , "r" (pOut) // 1 + , "r" (width) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3", ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -244,8 +253,11 @@ + , "r" (pOut) // 1 + , "r" (width) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3", ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -293,8 +305,11 @@ + , "r" (pOut) // 1 + , "r" (width) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3", ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -342,8 +357,11 @@ + , "r" (pOut) // 1 + , "r" (width) // 2 + , "r" (0) // 3 +- : "mm0", "mm1", "mm2", "mm3" +- , "mm4", "mm5", "mm6", "mm7" ++ #ifdef __MMX__ ++ : ++ "mm0", "mm1", "mm2", "mm3", ++ "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -396,7 +414,10 @@ + , "rm" (col32) // 1 + , "r" (width * sizeof(Pixel)) // 2 + , "r" (0) // 3 +- : "mm0" ++ #ifdef __MMX__ ++ : ++ "mm0" ++ #endif + ); + return; + } +@@ -429,7 +450,10 @@ + , "rm" (col32) // 1 + , "r" (width * sizeof(Pixel)) // 2 + , "r" (0) // 3 +- : "mm0" ++ #ifdef __MMX__ ++ : ++ "mm0" ++ #endif + ); + return; + } +diff -ur openmsx-0.5.2-orig/src/video/SimpleScaler.cc openmsx-0.5.2/src/video/SimpleScaler.cc +--- openmsx-0.5.2-orig/src/video/SimpleScaler.cc 2005-09-03 22:39:10.000000000 -0400 ++++ openmsx-0.5.2/src/video/SimpleScaler.cc 2005-09-03 22:31:27.000000000 -0400 +@@ -260,13 +260,13 @@ + "punpcklwd %%mm6, %%mm6;" + "punpckldq %%mm6, %%mm6;" // mm6 = c2 + "pxor %%mm7, %%mm7;" +- +- "movd (%0,%%eax,4), %%mm0;" ++ ++ "movd (%0,%%eax), %%mm0;" + "punpcklbw %%mm7, %%mm0;" // p0 = pIn[0] + "movq %%mm0, %%mm2;" + "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1) + "movq %%mm2, %%mm3;" // f1 = f0 +- ++ + ".p2align 4,,15;" + "1:" + "pmullw %%mm6, %%mm0;" +@@ -274,31 +274,31 @@ + "paddw %%mm3, %%mm0;" + "psrlw $8, %%mm0;" // f1 + tmp + +- "movd 4(%0,%%eax,4), %%mm1;" ++ "movd 4(%0,%%eax), %%mm1;" + "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1] + "movq %%mm1, %%mm3;" + "pmullw %%mm5, %%mm3;" // f1 = multiply(p1, c1) + "paddw %%mm3, %%mm4;" + "psrlw $8, %%mm4;" // f1 + tmp + "packuswb %%mm4, %%mm0;" +- "movq %%mm0, (%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = .. ++ "movq %%mm0, (%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = .. + + "pmullw %%mm6, %%mm1;" + "movq %%mm1, %%mm4;" // tmp = multiply(p1, c2) + "paddw %%mm2, %%mm1;" + "psrlw $8, %%mm1;" // f0 + tmp + +- "movd 8(%0,%%eax,4), %%mm0;" ++ "movd 8(%0,%%eax), %%mm0;" + "punpcklbw %%mm7, %%mm0;" // p0 = pIn[x + 2] + "movq %%mm0, %%mm2;" + "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1) + "paddw %%mm2, %%mm4;" + "psrlw $8, %%mm4;" // f0 + tmp + "packuswb %%mm4, %%mm1;" +- "movq %%mm1, 8(%1,%%eax,8);" // pOut[2*x+2] = .. pOut[2*x+3] = .. ++ "movq %%mm1, 8(%1,%%eax,2);" // pOut[2*x+2] = .. pOut[2*x+3] = .. + +- "addl $2, %%eax;" +- "cmpl $318, %%eax;" ++ "addl $8, %%eax;" ++ "cmpl $1272, %%eax;" + "jl 1b;" + + "pmullw %%mm6, %%mm0;" +@@ -306,23 +306,23 @@ + "paddw %%mm3, %%mm0;" + "psrlw $8, %%mm0;" // f1 + tmp + +- "movd 4(%0,%%eax,4), %%mm1;" ++ "movd 4(%0,%%eax), %%mm1;" + "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1] + "movq %%mm1, %%mm3;" + "pmullw %%mm5, %%mm3;" // f1 = multiply(p1, c1) + "paddw %%mm3, %%mm4;" + "psrlw $8, %%mm4;" // f1 + tmp + "packuswb %%mm4, %%mm0;" +- "movq %%mm0, (%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = .. +- ++ "movq %%mm0, (%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = .. ++ + "movq %%mm1, %%mm4;" + "pmullw %%mm6, %%mm1;" // tmp = multiply(p1, c2) + "paddw %%mm2, %%mm1;" + "psrlw $8, %%mm1;" // f0 + tmp + + "packuswb %%mm4, %%mm1;" +- "movq %%mm1, 8(%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = .. +- ++ "movq %%mm1, 8(%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = .. ++ + "emms;" + + : // no output +@@ -330,17 +330,19 @@ + , "r" (pOut) // 1 + , "r" (c1) // 2 + , "r" (c2) // 3 +- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" +- , "eax" ++ : "eax" ++ #ifdef __MMX__ ++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } + #endif +- ++ + // non-MMX routine, both 16bpp and 32bpp + mult1.setFactor(c1); + mult2.setFactor(c2); +- ++ + Pixel p0 = pIn[0]; + Pixel p1; + unsigned f0 = mult1.mul32(p0); +@@ -428,16 +430,16 @@ + "punpcklwd %%mm6, %%mm6;" + "punpckldq %%mm6, %%mm6;" // mm6 = c2 + "pxor %%mm7, %%mm7;" +- +- "movd (%0,%%eax,4), %%mm0;" ++ ++ "movd (%0,%%eax), %%mm0;" + "punpcklbw %%mm7, %%mm0;" // p0 = pIn[0] + "movq %%mm0, %%mm2;" + "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1) + "movq %%mm2, %%mm3;" // f1 = f0 +- ++ + ".p2align 4,,15;" + "1:" +- "movd 4(%0,%%eax,4), %%mm1;" ++ "movd 4(%0,%%eax), %%mm1;" + "pxor %%mm7, %%mm7;" + "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1] + "movq %%mm0, %%mm4;" +@@ -449,7 +451,7 @@ + "psrlw $8, %%mm4;" // f0 + t + t0 + "movq %%mm0, %%mm2;" // f0 = t0 + +- "movd 8(%0,%%eax,4), %%mm0;" ++ "movd 8(%0,%%eax), %%mm0;" + "punpcklbw %%mm7, %%mm0;" + "movq %%mm1, %%mm7;" + "pmullw %%mm6, %%mm7;" // t = multiply(p1, c2) +@@ -460,13 +462,13 @@ + "psrlw $8, %%mm7;" // f1 + t + t1 + "movq %%mm1, %%mm3;" // f1 = t1 + "packuswb %%mm7, %%mm4;" +- "movq %%mm4, (%1,%%eax,4);" // pOut[x] = .. pOut[x+1] = .. +- +- "addl $2, %%eax;" +- "cmpl $638, %%eax;" ++ "movq %%mm4, (%1,%%eax);" // pOut[x] = .. pOut[x+1] = .. ++ ++ "addl $8, %%eax;" ++ "cmpl $2552, %%eax;" + "jl 1b;" + +- "movd 4(%0,%%eax,4), %%mm1;" ++ "movd 4(%0,%%eax), %%mm1;" + "pxor %%mm7, %%mm7;" + "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1] + "movq %%mm0, %%mm4;" +@@ -482,8 +484,8 @@ + "paddw %%mm0, %%mm1;" + "psrlw $8, %%mm1;" // f1 + t + t1 + "packuswb %%mm1, %%mm4;" +- "movq %%mm4, (%1,%%eax,4);" // pOut[x] = .. pOut[x+1] = .. +- ++ "movq %%mm4, (%1,%%eax);" // pOut[x] = .. pOut[x+1] = .. ++ + "emms;" + + : // no output +@@ -491,8 +493,10 @@ + , "r" (pOut) // 1 + , "r" (c1) // 2 + , "r" (c2) // 3 +- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" +- , "eax" ++ : "eax" ++ #ifdef __MMX__ ++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -547,8 +551,8 @@ + "pshufw $0, %%mm6, %%mm6;" + ".p2align 4,,15;" + "1:" +- "movq (%0,%%eax,4), %%mm0;" +- "pavgb (%1,%%eax,4), %%mm0;" ++ "movq (%0,%%eax), %%mm0;" ++ "pavgb (%1,%%eax), %%mm0;" + "movq %%mm0, %%mm4;" + "punpcklbw %%mm7, %%mm0;" + "punpckhbw %%mm7, %%mm4;" +@@ -556,8 +560,8 @@ + "pmulhuw %%mm6, %%mm4;" + "packuswb %%mm4, %%mm0;" + +- "movq 8(%0,%%eax,4), %%mm1;" +- "pavgb 8(%1,%%eax,4), %%mm1;" ++ "movq 8(%0,%%eax), %%mm1;" ++ "pavgb 8(%1,%%eax), %%mm1;" + "movq %%mm1, %%mm5;" + "punpcklbw %%mm7, %%mm1;" + "punpckhbw %%mm7, %%mm5;" +@@ -565,8 +569,8 @@ + "pmulhuw %%mm6, %%mm5;" + "packuswb %%mm5, %%mm1;" + +- "movq 16(%0,%%eax,4), %%mm2;" +- "pavgb 16(%1,%%eax,4), %%mm2;" ++ "movq 16(%0,%%eax), %%mm2;" ++ "pavgb 16(%1,%%eax), %%mm2;" + "movq %%mm2, %%mm4;" + "punpcklbw %%mm7, %%mm2;" + "punpckhbw %%mm7, %%mm4;" +@@ -574,8 +578,8 @@ + "pmulhuw %%mm6, %%mm4;" + "packuswb %%mm4, %%mm2;" + +- "movq 24(%0,%%eax,4), %%mm3;" +- "pavgb 24(%1,%%eax,4), %%mm3;" ++ "movq 24(%0,%%eax), %%mm3;" ++ "pavgb 24(%1,%%eax), %%mm3;" + "movq %%mm3, %%mm5;" + "punpcklbw %%mm7, %%mm3;" + "punpckhbw %%mm7, %%mm5;" +@@ -583,24 +587,26 @@ + "pmulhuw %%mm6, %%mm5;" + "packuswb %%mm5, %%mm3;" + +- "movntq %%mm0, (%2,%%eax,4);" +- "movntq %%mm1, 8(%2,%%eax,4);" +- "movntq %%mm2, 16(%2,%%eax,4);" +- "movntq %%mm3, 24(%2,%%eax,4);" +- +- "addl $8, %%eax;" +- "cmpl $640, %%eax;" ++ "movntq %%mm0, (%2,%%eax);" ++ "movntq %%mm1, 8(%2,%%eax);" ++ "movntq %%mm2, 16(%2,%%eax);" ++ "movntq %%mm3, 24(%2,%%eax);" ++ ++ "addl $32, %%eax;" ++ "cmpl $2560, %%eax;" + "jl 1b;" +- ++ + "emms;" +- ++ + : // no output + : "r" (src1) // 0 + , "r" (src2) // 1 + , "r" (dst) // 2 + , "r" (alpha << 8) // 3 +- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" +- , "eax" ++ : "eax" ++ #ifdef __MMX__ ++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" ++ #endif + ); + return; + +@@ -610,15 +616,14 @@ + "movd %3, %%mm6;" + "pxor %%mm7, %%mm7;" + "punpcklwd %%mm6, %%mm6;" +- "punpckldq %%mm6, %%mm6;" +- + "xorl %%eax, %%eax;" ++ "punpckldq %%mm6, %%mm6;" + ".p2align 4,,15;" + "1:" + // load +- "movq (%0,%%eax,4), %%mm0;" ++ "movq (%0,%%eax), %%mm0;" + "movq %%mm0, %%mm1;" +- "movq (%1,%%eax,4), %%mm2;" ++ "movq (%1,%%eax), %%mm2;" + "movq %%mm2, %%mm3;" + // unpack + "punpcklbw %%mm7, %%mm0;" +@@ -634,21 +639,23 @@ + // pack + "packuswb %%mm1, %%mm0;" + // store +- "movq %%mm0, (%2,%%eax,4);" +- +- "addl $2, %%eax;" +- "cmpl $640, %%eax;" ++ "movq %%mm0, (%2,%%eax);" ++ ++ "addl $8, %%eax;" ++ "cmpl $2560, %%eax;" + "jl 1b;" +- ++ + "emms;" +- ++ + : // no output + : "r" (src1) // 0 + , "r" (src2) // 1 + , "r" (dst) // 2 + , "r" (alpha << 7) // 3 +- : "mm0", "mm1", "mm2", "mm3", "mm6", "mm7" +- , "eax" ++ : "eax" ++ #ifdef __MMX__ ++ , "mm0", "mm1", "mm2", "mm3", "mm6", "mm7" ++ #endif + ); + return; + } +@@ -659,17 +666,17 @@ + darkener.setFactor(alpha); + word* table = darkener.getTable(); + Pixel mask = ~blender.getMask(); +- ++ + asm ( + "movd %4, %%mm7;" + "xorl %%ecx, %%ecx;" + "pshufw $0, %%mm7, %%mm7;" +- ++ + ".p2align 4,,15;" +- "1:" "movq (%0,%%ecx,2), %%mm0;" +- "movq 8(%0,%%ecx,2), %%mm1;" +- "movq (%1,%%ecx,2), %%mm2;" +- "movq 8(%1,%%ecx,2), %%mm3;" ++ "1:" "movq (%0,%%ecx), %%mm0;" ++ "movq 8(%0,%%ecx), %%mm1;" ++ "movq (%1,%%ecx), %%mm2;" ++ "movq 8(%1,%%ecx), %%mm3;" + + "movq %%mm7, %%mm4;" + "movq %%mm7, %%mm5;" +@@ -683,14 +690,14 @@ + "pavgw %%mm3, %%mm1;" + "paddw %%mm4, %%mm0;" + "paddw %%mm5, %%mm1;" +- ++ + "pextrw $0, %%mm0, %%eax;" + "movw (%2,%%eax,2), %%ax;" + "pinsrw $0, %%eax, %%mm0;" + "pextrw $0, %%mm1, %%eax;" + "movw (%2,%%eax,2), %%ax;" + "pinsrw $0, %%eax, %%mm1;" +- ++ + "pextrw $1, %%mm0, %%eax;" + "movw (%2,%%eax,2), %%ax;" + "pinsrw $1, %%eax, %%mm0;" +@@ -711,12 +718,12 @@ + "pextrw $3, %%mm1, %%eax;" + "movw (%2,%%eax,2), %%ax;" + "pinsrw $3, %%eax, %%mm1;" +- +- "movntq %%mm0, (%3,%%ecx,2);" +- "movntq %%mm1, 8(%3,%%ecx,2);" + +- "addl $8, %%ecx;" +- "cmpl $640, %%ecx;" ++ "movntq %%mm0, (%3,%%ecx);" ++ "movntq %%mm1, 8(%3,%%ecx);" ++ ++ "addl $16, %%ecx;" ++ "cmpl $1280, %%ecx;" + "jl 1b;" + "emms;" + : // no output +@@ -725,16 +732,18 @@ + , "r" (table) // 2 + , "r" (dst) // 3 + , "m" (mask) // 4 +- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm7" +- , "eax", "ecx" ++ : "eax", "ecx" ++ #ifdef __MMX__ ++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm7" ++ #endif + ); + return; + } + // MMX routine 16bpp is missing, but it's difficult to write because + // of the missing "pextrw" and "pinsrw" instructions +- ++ + #endif +- ++ + // non-MMX routine, both 16bpp and 32bpp + for (unsigned x = 0; x < 640; ++x) { + dst[x] = mult1.multiply( |