summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Loeser <halcy0n@gentoo.org>2005-09-05 21:41:17 +0000
committerMark Loeser <halcy0n@gentoo.org>2005-09-05 21:41:17 +0000
commit9ad2547b321d118d5261359c3295e589aa687468 (patch)
tree036f3a619a1a89c252adb7c93ef82850a56ce1ed /games-emulation/openmsx/files
parent~amd64 (diff)
downloadgentoo-2-9ad2547b321d118d5261359c3295e589aa687468.tar.gz
gentoo-2-9ad2547b321d118d5261359c3295e589aa687468.tar.bz2
gentoo-2-9ad2547b321d118d5261359c3295e589aa687468.zip
Adding upstream patch to fix compilation with GCC4. Some slight changes were
(Portage version: 2.0.51.22-r2)
Diffstat (limited to 'games-emulation/openmsx/files')
-rw-r--r--games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch758
1 files changed, 758 insertions, 0 deletions
diff --git a/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch b/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch
new file mode 100644
index 000000000000..f217f744d204
--- /dev/null
+++ b/games-emulation/openmsx/files/openmsx-0.5.2-gcc4.patch
@@ -0,0 +1,758 @@
+diff -ur openmsx-0.5.2-orig/src/sound/Y8950.cc openmsx-0.5.2/src/sound/Y8950.cc
+--- openmsx-0.5.2-orig/src/sound/Y8950.cc 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/sound/Y8950.cc 2005-09-03 22:31:27.000000000 -0400
+@@ -18,6 +18,22 @@
+
+ namespace openmsx {
+
++static const double PI = 3.14159265358979;
++static const double EG_STEP = 0.1875;
++static const double SL_STEP = 3.0;
++static const double TL_STEP = 0.75;
++static const double DB_STEP = 0.1875;
++
++// PM speed(Hz) and depth(cent)
++static const double PM_SPEED = 6.4;
++static const double PM_DEPTH = 13.75 / 2;
++static const double PM_DEPTH2 = 13.75;
++
++// AM speed(Hz) and depth(dB)
++static const double AM_SPEED = 3.7;
++static const double AM_DEPTH = 1.0;
++static const double AM_DEPTH2 = 4.8;
++
+ short Y8950::dB2LinTab[(2*DB_MUTE)*2];
+ int Y8950::Slot::sintable[PG_WIDTH];
+ int Y8950::Slot::tllTable[16][8][1<<TL_BITS][4];
+diff -ur openmsx-0.5.2-orig/src/sound/Y8950.hh openmsx-0.5.2/src/sound/Y8950.hh
+--- openmsx-0.5.2-orig/src/sound/Y8950.hh 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/sound/Y8950.hh 2005-09-03 22:31:27.000000000 -0400
+@@ -115,11 +115,9 @@
+
+
+ // Dynamic range of envelope
+- static const double EG_STEP = 0.1875;
+ static const int EG_BITS = 9;
+ static const int EG_MUTE = 1<<EG_BITS;
+ // Dynamic range of sustine level
+- static const double SL_STEP = 3.0;
+ static const int SL_BITS = 4;
+ static const int SL_MUTE = 1<<SL_BITS;
+ // Size of Sintable ( 1 -- 18 can be used, but 7 -- 14 recommended.)
+@@ -133,7 +131,6 @@
+ static const int EG_DP_BITS = 23;
+ static const int EG_DP_WIDTH = 1<<EG_DP_BITS;
+ // Dynamic range of total level
+- static const double TL_STEP = 0.75;
+ static const int TL_BITS = 6;
+ static const int TL_MUTE = 1<<TL_BITS;
+
+@@ -193,7 +190,6 @@
+ // Definition of envelope mode
+ enum { ATTACK,DECAY,SUSHOLD,SUSTINE,RELEASE,FINISH };
+ // Dynamic range
+- static const double DB_STEP = 0.1875;
+ static const int DB_BITS = 9;
+ static const int DB_MUTE = 1<<DB_BITS;
+ // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200)
+@@ -264,15 +260,6 @@
+ Slot *slot[18];
+
+ static const int CLK_FREQ = 3579545;
+- static const double PI = 3.14159265358979;
+- // PM speed(Hz) and depth(cent)
+- static const double PM_SPEED = 6.4;
+- static const double PM_DEPTH = (13.75/2);
+- static const double PM_DEPTH2 = 13.75;
+- // AM speed(Hz) and depth(dB)
+- static const double AM_SPEED = 3.7;
+- static const double AM_DEPTH = 1.0;
+- static const double AM_DEPTH2 = 4.8;
+ // Bits for liner value
+ static const int DB2LIN_AMP_BITS = 11;
+ static const int SLOT_AMP_BITS = DB2LIN_AMP_BITS;
+diff -ur openmsx-0.5.2-orig/src/sound/YM2413.cc openmsx-0.5.2/src/sound/YM2413.cc
+--- openmsx-0.5.2-orig/src/sound/YM2413.cc 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/sound/YM2413.cc 2005-09-03 22:31:27.000000000 -0400
+@@ -20,6 +20,18 @@
+
+ static const int CLOCK_FREQ = 3579545;
+ static const double PI = 3.14159265358979323846;
++static const double DB_STEP = 48.0 / (1 << 8); // 48 / (1 << DB_BITS)
++static const double EG_STEP = 0.375;
++static const double TL_STEP = 0.75;
++static const double SL_STEP = 3.0;
++
++// PM speed(Hz) and depth(cent)
++static const double PM_SPEED = 6.4;
++static const double PM_DEPTH = 13.75;
++
++// AM speed(Hz) and depth(dB)
++static const double AM_SPEED = 3.6413;
++static const double AM_DEPTH = 4.875;
+
+ int YM2413::pmtable[PM_PG_WIDTH];
+ int YM2413::amtable[AM_PG_WIDTH];
+diff -ur openmsx-0.5.2-orig/src/sound/YM2413.hh openmsx-0.5.2/src/sound/YM2413.hh
+--- openmsx-0.5.2-orig/src/sound/YM2413.hh 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/sound/YM2413.hh 2005-09-03 22:31:27.000000000 -0400
+@@ -11,13 +11,14 @@
+ namespace openmsx {
+
+ class EmuTime;
++class MSXMotherBoard;
+
+ class YM2413 : public YM2413Core, private SoundDevice, private Debuggable
+ {
+ struct Patch {
+ Patch();
+ Patch(int n, const byte* data);
+-
++
+ bool AM, PM, EG;
+ byte KR; // 0-1
+ byte ML; // 0-15
+@@ -174,21 +175,17 @@
+
+ // Dynamic range (Accuracy of sin table)
+ static const int DB_BITS = 8;
+- static const double DB_STEP = 48.0 / (1 << DB_BITS);
+ static const int DB_MUTE = 1 << DB_BITS;
+
+ // Dynamic range of envelope
+- static const double EG_STEP = 0.375;
+ static const int EG_BITS = 7;
+ static const int EG_MUTE = 1 << EG_BITS;
+
+ // Dynamic range of total level
+- static const double TL_STEP = 0.75;
+ static const int TL_BITS = 6;
+ static const int TL_MUTE = 1 << TL_BITS;
+
+ // Dynamic range of sustine level
+- static const double SL_STEP = 3.0;
+ static const int SL_BITS = 4;
+ static const int SL_MUTE = 1 << SL_BITS;
+
+@@ -210,18 +207,10 @@
+ static const int AM_DP_BITS = 16;
+ static const int AM_DP_WIDTH = 1 << AM_DP_BITS;
+
+- // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200)
++ // PM table is calcurated by PM_AMP * pow(2,PM_DEPTH*sin(x)/1200)
+ static const int PM_AMP_BITS = 8;
+ static const int PM_AMP = 1 << PM_AMP_BITS;
+
+- // PM speed(Hz) and depth(cent)
+- static const double PM_SPEED = 6.4;
+- static const double PM_DEPTH = 13.75;
+-
+- // AM speed(Hz) and depth(dB)
+- static const double AM_SPEED = 3.6413;
+- static const double AM_DEPTH = 4.875;
+-
+ int maxVolume;
+
+ // Register
+Only in openmsx-0.5.2-orig/src/sound: YM2413.hh.orig
+diff -ur openmsx-0.5.2-orig/src/video/Scale2xScaler.cc openmsx-0.5.2/src/video/Scale2xScaler.cc
+--- openmsx-0.5.2-orig/src/video/Scale2xScaler.cc 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/video/Scale2xScaler.cc 2005-09-03 22:31:27.000000000 -0400
+@@ -132,8 +132,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -254,8 +256,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -364,8 +368,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -570,8 +576,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -702,8 +710,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -805,8 +815,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -922,8 +934,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+@@ -1037,8 +1051,10 @@
+ , "r" (src2) // 2
+ , "r" (dst) // 3
+ : "eax"
++ #ifdef __MMX__
+ , "mm0", "mm1", "mm2", "mm3"
+ , "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ };
+diff -ur openmsx-0.5.2-orig/src/video/Scaler.cc openmsx-0.5.2/src/video/Scaler.cc
+--- openmsx-0.5.2-orig/src/video/Scaler.cc 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/video/Scaler.cc 2005-09-03 22:40:18.000000000 -0400
+@@ -86,8 +86,11 @@
+ , "r" (pOut) // 1
+ , "r" (nBytes) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3" ,
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -126,8 +129,11 @@
+ , "r" (pOut) // 1
+ , "r" (nBytes) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3",
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -195,8 +201,11 @@
+ , "r" (pOut) // 1
+ , "r" (width) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3",
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -244,8 +253,11 @@
+ , "r" (pOut) // 1
+ , "r" (width) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3",
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -293,8 +305,11 @@
+ , "r" (pOut) // 1
+ , "r" (width) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3",
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -342,8 +357,11 @@
+ , "r" (pOut) // 1
+ , "r" (width) // 2
+ , "r" (0) // 3
+- : "mm0", "mm1", "mm2", "mm3"
+- , "mm4", "mm5", "mm6", "mm7"
++ #ifdef __MMX__
++ :
++ "mm0", "mm1", "mm2", "mm3",
++ "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -396,7 +414,10 @@
+ , "rm" (col32) // 1
+ , "r" (width * sizeof(Pixel)) // 2
+ , "r" (0) // 3
+- : "mm0"
++ #ifdef __MMX__
++ :
++ "mm0"
++ #endif
+ );
+ return;
+ }
+@@ -429,7 +450,10 @@
+ , "rm" (col32) // 1
+ , "r" (width * sizeof(Pixel)) // 2
+ , "r" (0) // 3
+- : "mm0"
++ #ifdef __MMX__
++ :
++ "mm0"
++ #endif
+ );
+ return;
+ }
+diff -ur openmsx-0.5.2-orig/src/video/SimpleScaler.cc openmsx-0.5.2/src/video/SimpleScaler.cc
+--- openmsx-0.5.2-orig/src/video/SimpleScaler.cc 2005-09-03 22:39:10.000000000 -0400
++++ openmsx-0.5.2/src/video/SimpleScaler.cc 2005-09-03 22:31:27.000000000 -0400
+@@ -260,13 +260,13 @@
+ "punpcklwd %%mm6, %%mm6;"
+ "punpckldq %%mm6, %%mm6;" // mm6 = c2
+ "pxor %%mm7, %%mm7;"
+-
+- "movd (%0,%%eax,4), %%mm0;"
++
++ "movd (%0,%%eax), %%mm0;"
+ "punpcklbw %%mm7, %%mm0;" // p0 = pIn[0]
+ "movq %%mm0, %%mm2;"
+ "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1)
+ "movq %%mm2, %%mm3;" // f1 = f0
+-
++
+ ".p2align 4,,15;"
+ "1:"
+ "pmullw %%mm6, %%mm0;"
+@@ -274,31 +274,31 @@
+ "paddw %%mm3, %%mm0;"
+ "psrlw $8, %%mm0;" // f1 + tmp
+
+- "movd 4(%0,%%eax,4), %%mm1;"
++ "movd 4(%0,%%eax), %%mm1;"
+ "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1]
+ "movq %%mm1, %%mm3;"
+ "pmullw %%mm5, %%mm3;" // f1 = multiply(p1, c1)
+ "paddw %%mm3, %%mm4;"
+ "psrlw $8, %%mm4;" // f1 + tmp
+ "packuswb %%mm4, %%mm0;"
+- "movq %%mm0, (%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
++ "movq %%mm0, (%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
+
+ "pmullw %%mm6, %%mm1;"
+ "movq %%mm1, %%mm4;" // tmp = multiply(p1, c2)
+ "paddw %%mm2, %%mm1;"
+ "psrlw $8, %%mm1;" // f0 + tmp
+
+- "movd 8(%0,%%eax,4), %%mm0;"
++ "movd 8(%0,%%eax), %%mm0;"
+ "punpcklbw %%mm7, %%mm0;" // p0 = pIn[x + 2]
+ "movq %%mm0, %%mm2;"
+ "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1)
+ "paddw %%mm2, %%mm4;"
+ "psrlw $8, %%mm4;" // f0 + tmp
+ "packuswb %%mm4, %%mm1;"
+- "movq %%mm1, 8(%1,%%eax,8);" // pOut[2*x+2] = .. pOut[2*x+3] = ..
++ "movq %%mm1, 8(%1,%%eax,2);" // pOut[2*x+2] = .. pOut[2*x+3] = ..
+
+- "addl $2, %%eax;"
+- "cmpl $318, %%eax;"
++ "addl $8, %%eax;"
++ "cmpl $1272, %%eax;"
+ "jl 1b;"
+
+ "pmullw %%mm6, %%mm0;"
+@@ -306,23 +306,23 @@
+ "paddw %%mm3, %%mm0;"
+ "psrlw $8, %%mm0;" // f1 + tmp
+
+- "movd 4(%0,%%eax,4), %%mm1;"
++ "movd 4(%0,%%eax), %%mm1;"
+ "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1]
+ "movq %%mm1, %%mm3;"
+ "pmullw %%mm5, %%mm3;" // f1 = multiply(p1, c1)
+ "paddw %%mm3, %%mm4;"
+ "psrlw $8, %%mm4;" // f1 + tmp
+ "packuswb %%mm4, %%mm0;"
+- "movq %%mm0, (%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
+-
++ "movq %%mm0, (%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
++
+ "movq %%mm1, %%mm4;"
+ "pmullw %%mm6, %%mm1;" // tmp = multiply(p1, c2)
+ "paddw %%mm2, %%mm1;"
+ "psrlw $8, %%mm1;" // f0 + tmp
+
+ "packuswb %%mm4, %%mm1;"
+- "movq %%mm1, 8(%1,%%eax,8);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
+-
++ "movq %%mm1, 8(%1,%%eax,2);" // pOut[2*x+0] = .. pOut[2*x+1] = ..
++
+ "emms;"
+
+ : // no output
+@@ -330,17 +330,19 @@
+ , "r" (pOut) // 1
+ , "r" (c1) // 2
+ , "r" (c2) // 3
+- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
+- , "eax"
++ : "eax"
++ #ifdef __MMX__
++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+ #endif
+-
++
+ // non-MMX routine, both 16bpp and 32bpp
+ mult1.setFactor(c1);
+ mult2.setFactor(c2);
+-
++
+ Pixel p0 = pIn[0];
+ Pixel p1;
+ unsigned f0 = mult1.mul32(p0);
+@@ -428,16 +430,16 @@
+ "punpcklwd %%mm6, %%mm6;"
+ "punpckldq %%mm6, %%mm6;" // mm6 = c2
+ "pxor %%mm7, %%mm7;"
+-
+- "movd (%0,%%eax,4), %%mm0;"
++
++ "movd (%0,%%eax), %%mm0;"
+ "punpcklbw %%mm7, %%mm0;" // p0 = pIn[0]
+ "movq %%mm0, %%mm2;"
+ "pmullw %%mm5, %%mm2;" // f0 = multiply(p0, c1)
+ "movq %%mm2, %%mm3;" // f1 = f0
+-
++
+ ".p2align 4,,15;"
+ "1:"
+- "movd 4(%0,%%eax,4), %%mm1;"
++ "movd 4(%0,%%eax), %%mm1;"
+ "pxor %%mm7, %%mm7;"
+ "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1]
+ "movq %%mm0, %%mm4;"
+@@ -449,7 +451,7 @@
+ "psrlw $8, %%mm4;" // f0 + t + t0
+ "movq %%mm0, %%mm2;" // f0 = t0
+
+- "movd 8(%0,%%eax,4), %%mm0;"
++ "movd 8(%0,%%eax), %%mm0;"
+ "punpcklbw %%mm7, %%mm0;"
+ "movq %%mm1, %%mm7;"
+ "pmullw %%mm6, %%mm7;" // t = multiply(p1, c2)
+@@ -460,13 +462,13 @@
+ "psrlw $8, %%mm7;" // f1 + t + t1
+ "movq %%mm1, %%mm3;" // f1 = t1
+ "packuswb %%mm7, %%mm4;"
+- "movq %%mm4, (%1,%%eax,4);" // pOut[x] = .. pOut[x+1] = ..
+-
+- "addl $2, %%eax;"
+- "cmpl $638, %%eax;"
++ "movq %%mm4, (%1,%%eax);" // pOut[x] = .. pOut[x+1] = ..
++
++ "addl $8, %%eax;"
++ "cmpl $2552, %%eax;"
+ "jl 1b;"
+
+- "movd 4(%0,%%eax,4), %%mm1;"
++ "movd 4(%0,%%eax), %%mm1;"
+ "pxor %%mm7, %%mm7;"
+ "punpcklbw %%mm7, %%mm1;" // p1 = pIn[x + 1]
+ "movq %%mm0, %%mm4;"
+@@ -482,8 +484,8 @@
+ "paddw %%mm0, %%mm1;"
+ "psrlw $8, %%mm1;" // f1 + t + t1
+ "packuswb %%mm1, %%mm4;"
+- "movq %%mm4, (%1,%%eax,4);" // pOut[x] = .. pOut[x+1] = ..
+-
++ "movq %%mm4, (%1,%%eax);" // pOut[x] = .. pOut[x+1] = ..
++
+ "emms;"
+
+ : // no output
+@@ -491,8 +493,10 @@
+ , "r" (pOut) // 1
+ , "r" (c1) // 2
+ , "r" (c2) // 3
+- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
+- , "eax"
++ : "eax"
++ #ifdef __MMX__
++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -547,8 +551,8 @@
+ "pshufw $0, %%mm6, %%mm6;"
+ ".p2align 4,,15;"
+ "1:"
+- "movq (%0,%%eax,4), %%mm0;"
+- "pavgb (%1,%%eax,4), %%mm0;"
++ "movq (%0,%%eax), %%mm0;"
++ "pavgb (%1,%%eax), %%mm0;"
+ "movq %%mm0, %%mm4;"
+ "punpcklbw %%mm7, %%mm0;"
+ "punpckhbw %%mm7, %%mm4;"
+@@ -556,8 +560,8 @@
+ "pmulhuw %%mm6, %%mm4;"
+ "packuswb %%mm4, %%mm0;"
+
+- "movq 8(%0,%%eax,4), %%mm1;"
+- "pavgb 8(%1,%%eax,4), %%mm1;"
++ "movq 8(%0,%%eax), %%mm1;"
++ "pavgb 8(%1,%%eax), %%mm1;"
+ "movq %%mm1, %%mm5;"
+ "punpcklbw %%mm7, %%mm1;"
+ "punpckhbw %%mm7, %%mm5;"
+@@ -565,8 +569,8 @@
+ "pmulhuw %%mm6, %%mm5;"
+ "packuswb %%mm5, %%mm1;"
+
+- "movq 16(%0,%%eax,4), %%mm2;"
+- "pavgb 16(%1,%%eax,4), %%mm2;"
++ "movq 16(%0,%%eax), %%mm2;"
++ "pavgb 16(%1,%%eax), %%mm2;"
+ "movq %%mm2, %%mm4;"
+ "punpcklbw %%mm7, %%mm2;"
+ "punpckhbw %%mm7, %%mm4;"
+@@ -574,8 +578,8 @@
+ "pmulhuw %%mm6, %%mm4;"
+ "packuswb %%mm4, %%mm2;"
+
+- "movq 24(%0,%%eax,4), %%mm3;"
+- "pavgb 24(%1,%%eax,4), %%mm3;"
++ "movq 24(%0,%%eax), %%mm3;"
++ "pavgb 24(%1,%%eax), %%mm3;"
+ "movq %%mm3, %%mm5;"
+ "punpcklbw %%mm7, %%mm3;"
+ "punpckhbw %%mm7, %%mm5;"
+@@ -583,24 +587,26 @@
+ "pmulhuw %%mm6, %%mm5;"
+ "packuswb %%mm5, %%mm3;"
+
+- "movntq %%mm0, (%2,%%eax,4);"
+- "movntq %%mm1, 8(%2,%%eax,4);"
+- "movntq %%mm2, 16(%2,%%eax,4);"
+- "movntq %%mm3, 24(%2,%%eax,4);"
+-
+- "addl $8, %%eax;"
+- "cmpl $640, %%eax;"
++ "movntq %%mm0, (%2,%%eax);"
++ "movntq %%mm1, 8(%2,%%eax);"
++ "movntq %%mm2, 16(%2,%%eax);"
++ "movntq %%mm3, 24(%2,%%eax);"
++
++ "addl $32, %%eax;"
++ "cmpl $2560, %%eax;"
+ "jl 1b;"
+-
++
+ "emms;"
+-
++
+ : // no output
+ : "r" (src1) // 0
+ , "r" (src2) // 1
+ , "r" (dst) // 2
+ , "r" (alpha << 8) // 3
+- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
+- , "eax"
++ : "eax"
++ #ifdef __MMX__
++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
++ #endif
+ );
+ return;
+
+@@ -610,15 +616,14 @@
+ "movd %3, %%mm6;"
+ "pxor %%mm7, %%mm7;"
+ "punpcklwd %%mm6, %%mm6;"
+- "punpckldq %%mm6, %%mm6;"
+-
+ "xorl %%eax, %%eax;"
++ "punpckldq %%mm6, %%mm6;"
+ ".p2align 4,,15;"
+ "1:"
+ // load
+- "movq (%0,%%eax,4), %%mm0;"
++ "movq (%0,%%eax), %%mm0;"
+ "movq %%mm0, %%mm1;"
+- "movq (%1,%%eax,4), %%mm2;"
++ "movq (%1,%%eax), %%mm2;"
+ "movq %%mm2, %%mm3;"
+ // unpack
+ "punpcklbw %%mm7, %%mm0;"
+@@ -634,21 +639,23 @@
+ // pack
+ "packuswb %%mm1, %%mm0;"
+ // store
+- "movq %%mm0, (%2,%%eax,4);"
+-
+- "addl $2, %%eax;"
+- "cmpl $640, %%eax;"
++ "movq %%mm0, (%2,%%eax);"
++
++ "addl $8, %%eax;"
++ "cmpl $2560, %%eax;"
+ "jl 1b;"
+-
++
+ "emms;"
+-
++
+ : // no output
+ : "r" (src1) // 0
+ , "r" (src2) // 1
+ , "r" (dst) // 2
+ , "r" (alpha << 7) // 3
+- : "mm0", "mm1", "mm2", "mm3", "mm6", "mm7"
+- , "eax"
++ : "eax"
++ #ifdef __MMX__
++ , "mm0", "mm1", "mm2", "mm3", "mm6", "mm7"
++ #endif
+ );
+ return;
+ }
+@@ -659,17 +666,17 @@
+ darkener.setFactor(alpha);
+ word* table = darkener.getTable();
+ Pixel mask = ~blender.getMask();
+-
++
+ asm (
+ "movd %4, %%mm7;"
+ "xorl %%ecx, %%ecx;"
+ "pshufw $0, %%mm7, %%mm7;"
+-
++
+ ".p2align 4,,15;"
+- "1:" "movq (%0,%%ecx,2), %%mm0;"
+- "movq 8(%0,%%ecx,2), %%mm1;"
+- "movq (%1,%%ecx,2), %%mm2;"
+- "movq 8(%1,%%ecx,2), %%mm3;"
++ "1:" "movq (%0,%%ecx), %%mm0;"
++ "movq 8(%0,%%ecx), %%mm1;"
++ "movq (%1,%%ecx), %%mm2;"
++ "movq 8(%1,%%ecx), %%mm3;"
+
+ "movq %%mm7, %%mm4;"
+ "movq %%mm7, %%mm5;"
+@@ -683,14 +690,14 @@
+ "pavgw %%mm3, %%mm1;"
+ "paddw %%mm4, %%mm0;"
+ "paddw %%mm5, %%mm1;"
+-
++
+ "pextrw $0, %%mm0, %%eax;"
+ "movw (%2,%%eax,2), %%ax;"
+ "pinsrw $0, %%eax, %%mm0;"
+ "pextrw $0, %%mm1, %%eax;"
+ "movw (%2,%%eax,2), %%ax;"
+ "pinsrw $0, %%eax, %%mm1;"
+-
++
+ "pextrw $1, %%mm0, %%eax;"
+ "movw (%2,%%eax,2), %%ax;"
+ "pinsrw $1, %%eax, %%mm0;"
+@@ -711,12 +718,12 @@
+ "pextrw $3, %%mm1, %%eax;"
+ "movw (%2,%%eax,2), %%ax;"
+ "pinsrw $3, %%eax, %%mm1;"
+-
+- "movntq %%mm0, (%3,%%ecx,2);"
+- "movntq %%mm1, 8(%3,%%ecx,2);"
+
+- "addl $8, %%ecx;"
+- "cmpl $640, %%ecx;"
++ "movntq %%mm0, (%3,%%ecx);"
++ "movntq %%mm1, 8(%3,%%ecx);"
++
++ "addl $16, %%ecx;"
++ "cmpl $1280, %%ecx;"
+ "jl 1b;"
+ "emms;"
+ : // no output
+@@ -725,16 +732,18 @@
+ , "r" (table) // 2
+ , "r" (dst) // 3
+ , "m" (mask) // 4
+- : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm7"
+- , "eax", "ecx"
++ : "eax", "ecx"
++ #ifdef __MMX__
++ , "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm7"
++ #endif
+ );
+ return;
+ }
+ // MMX routine 16bpp is missing, but it's difficult to write because
+ // of the missing "pextrw" and "pinsrw" instructions
+-
++
+ #endif
+-
++
+ // non-MMX routine, both 16bpp and 32bpp
+ for (unsigned x = 0; x < 640; ++x) {
+ dst[x] = mult1.multiply(