51 # include <mmintrin.h>
53 # include <SDL_cpuinfo.h>
61 #define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | ((x) << 24))
68 static int SDL_imageFilterUseMMX = 1;
83 if (SDL_imageFilterUseMMX == 0) {
95 SDL_imageFilterUseMMX = 0;
103 SDL_imageFilterUseMMX = 1;
118 static int SDL_imageFilterAddMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
145 __m64 *mSrc1 = (__m64*)Src1;
146 __m64 *mSrc2 = (__m64*)Src2;
147 __m64 *mDest = (__m64*)Dest;
149 for (i = 0; i < SrcLength/8; i++) {
150 *mDest = _m_paddusb(*mSrc1, *mSrc2);
173 int SDL_imageFilterAdd(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int length)
175 unsigned int i, istart;
176 unsigned char *cursrc1, *cursrc2, *curdst;
180 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
188 SDL_imageFilterAddMMX(Src1, Src2, Dest, length);
191 if ((length & 7) > 0) {
193 istart = length & 0xfffffff8;
194 cursrc1 = &Src1[istart];
195 cursrc2 = &Src2[istart];
196 curdst = &Dest[istart];
210 for (i = istart; i < length; i++) {
211 result = (int) *cursrc1 + (
int) *cursrc2;
214 *curdst = (
unsigned char) result;
235 static int SDL_imageFilterMeanMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength,
273 __m64 *mSrc1 = (__m64*)Src1;
274 __m64 *mSrc2 = (__m64*)Src2;
275 __m64 *mDest = (__m64*)Dest;
276 __m64 *mMask = (__m64*)Mask;
278 for (i = 0; i < SrcLength/8; i++) {
281 mm1 = _m_psrlwi(mm1, 1);
282 mm2 = _m_psrlwi(mm2, 1);
283 mm1 = _m_pand(mm1, *mMask);
284 mm2 = _m_pand(mm2, *mMask);
285 *mDest = _m_paddusb(mm1, mm2);
308 int SDL_imageFilterMean(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int length)
310 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
311 unsigned int i, istart;
312 unsigned char *cursrc1, *cursrc2, *curdst;
316 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
323 SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask);
326 if ((length & 7) > 0) {
328 istart = length & 0xfffffff8;
329 cursrc1 = &Src1[istart];
330 cursrc2 = &Src2[istart];
331 curdst = &Dest[istart];
345 for (i = istart; i < length; i++) {
346 result = (int) *cursrc1 / 2 + (
int) *cursrc2 / 2;
347 *curdst = (
unsigned char) result;
367 static int SDL_imageFilterSubMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
394 __m64 *mSrc1 = (__m64*)Src1;
395 __m64 *mSrc2 = (__m64*)Src2;
396 __m64 *mDest = (__m64*)Dest;
398 for (i = 0; i < SrcLength/8; i++) {
399 *mDest = _m_psubusb(*mSrc1, *mSrc2);
422 int SDL_imageFilterSub(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int length)
424 unsigned int i, istart;
425 unsigned char *cursrc1, *cursrc2, *curdst;
429 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
436 SDL_imageFilterSubMMX(Src1, Src2, Dest, length);
439 if ((length & 7) > 0) {
441 istart = length & 0xfffffff8;
442 cursrc1 = &Src1[istart];
443 cursrc2 = &Src2[istart];
444 curdst = &Dest[istart];
458 for (i = istart; i < length; i++) {
459 result = (int) *cursrc1 - (
int) *cursrc2;
462 *curdst = (
unsigned char) result;
482 static int SDL_imageFilterAbsDiffMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
512 __m64 *mSrc1 = (__m64*)Src1;
513 __m64 *mSrc2 = (__m64*)Src2;
514 __m64 *mDest = (__m64*)Dest;
516 for (i = 0; i < SrcLength/8; i++) {
517 __m64 mm1 = _m_psubusb(*mSrc2, *mSrc1);
518 __m64 mm2 = _m_psubusb(*mSrc1, *mSrc2);
519 *mDest = _m_por(mm1, mm2);
544 unsigned int i, istart;
545 unsigned char *cursrc1, *cursrc2, *curdst;
549 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
556 SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length);
559 if ((length & 7) > 0) {
561 istart = length & 0xfffffff8;
562 cursrc1 = &Src1[istart];
563 cursrc2 = &Src2[istart];
564 curdst = &Dest[istart];
578 for (i = istart; i < length; i++) {
579 result = abs((
int) *cursrc1 - (
int) *cursrc2);
580 *curdst = (
unsigned char) result;
600 static int SDL_imageFilterMultMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
687 __m64 *mSrc1 = (__m64*)Src1;
688 __m64 *mSrc2 = (__m64*)Src2;
689 __m64 *mDest = (__m64*)Dest;
690 __m64 mm0 = _m_from_int(0);
692 for (i = 0; i < SrcLength/8; i++) {
693 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
694 mm1 = _m_punpcklbw(*mSrc1, mm0);
695 mm2 = _m_punpckhbw(*mSrc1, mm0);
696 mm3 = _m_punpcklbw(*mSrc2, mm0);
697 mm4 = _m_punpckhbw(*mSrc2, mm0);
698 mm1 = _m_pmullw(mm1, mm3);
699 mm2 = _m_pmullw(mm2, mm4);
700 mm5 = _m_psrawi(mm1, 15);
701 mm6 = _m_psrawi(mm2, 15);
702 mm1 = _m_pxor(mm1, mm5);
703 mm2 = _m_pxor(mm2, mm6);
704 mm1 = _m_psubsw(mm1, mm5);
705 mm2 = _m_psubsw(mm2, mm6);
706 *mDest = _m_packuswb(mm1, mm2);
729 int SDL_imageFilterMult(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int length)
731 unsigned int i, istart;
732 unsigned char *cursrc1, *cursrc2, *curdst;
736 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
743 SDL_imageFilterMultMMX(Src1, Src2, Dest, length);
746 if ((length & 7) > 0) {
748 istart = length & 0xfffffff8;
749 cursrc1 = &Src1[istart];
750 cursrc2 = &Src2[istart];
751 curdst = &Dest[istart];
765 for (i = istart; i < length; i++) {
769 result = (int) *cursrc1 * (
int) *cursrc2;
772 *curdst = (
unsigned char) result;
821 "1:mov (%%edx), %%al \n\t"
823 "mov %%al, (%%edi) \n\t"
828 # elif defined(__x86_64__)
829 "1:mov (%%rdx), %%al \n\t"
831 "mov %%al, (%%rdi) \n\t"
864 unsigned int i, istart;
865 unsigned char *cursrc1, *cursrc2, *curdst;
868 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
879 if ((length & 7) > 0) {
881 istart = length & 0xfffffff8;
882 cursrc1 = &Src1[istart];
883 cursrc2 = &Src2[istart];
884 curdst = &Dest[istart];
902 for (i = istart; i < length; i++) {
903 *curdst = (int)*cursrc1 * (
int)*cursrc2;
923 static int SDL_imageFilterMultDivby2MMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
962 __m64 *mSrc1 = (__m64*)Src1;
963 __m64 *mSrc2 = (__m64*)Src2;
964 __m64 *mDest = (__m64*)Dest;
965 __m64 mm0 = _m_from_int(0);
967 for (i = 0; i < SrcLength/8; i++) {
968 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
969 mm1 = _m_punpcklbw(*mSrc1, mm0);
970 mm2 = _m_punpckhbw(*mSrc1, mm0);
971 mm3 = _m_punpcklbw(*mSrc2, mm0);
972 mm4 = _m_punpckhbw(*mSrc2, mm0);
973 mm1 = _m_psrlwi(mm1, 1);
974 mm2 = _m_psrlwi(mm2, 1);
975 mm1 = _m_pmullw(mm1, mm3);
976 mm2 = _m_pmullw(mm2, mm4);
977 *mDest = _m_packuswb(mm1, mm2);
1002 unsigned int i, istart;
1003 unsigned char *cursrc1, *cursrc2, *curdst;
1007 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
1014 SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length);
1017 if ((length & 7) > 0) {
1019 istart = length & 0xfffffff8;
1020 cursrc1 = &Src1[istart];
1021 cursrc2 = &Src2[istart];
1022 curdst = &Dest[istart];
1036 for (i = istart; i < length; i++) {
1037 result = ((int) *cursrc1 / 2) * (int) *cursrc2;
1040 *curdst = (
unsigned char) result;
1060 static int SDL_imageFilterMultDivby4MMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
1101 __m64 *mSrc1 = (__m64*)Src1;
1102 __m64 *mSrc2 = (__m64*)Src2;
1103 __m64 *mDest = (__m64*)Dest;
1104 __m64 mm0 = _m_from_int(0);
1106 for (i = 0; i < SrcLength/8; i++) {
1107 __m64 mm1, mm2, mm3, mm4, mm5, mm6;
1108 mm1 = _m_punpcklbw(*mSrc1, mm0);
1109 mm2 = _m_punpckhbw(*mSrc1, mm0);
1110 mm3 = _m_punpcklbw(*mSrc2, mm0);
1111 mm4 = _m_punpckhbw(*mSrc2, mm0);
1112 mm1 = _m_psrlwi(mm1, 1);
1113 mm2 = _m_psrlwi(mm2, 1);
1114 mm3 = _m_psrlwi(mm3, 1);
1115 mm4 = _m_psrlwi(mm4, 1);
1116 mm1 = _m_pmullw(mm1, mm3);
1117 mm2 = _m_pmullw(mm2, mm4);
1118 *mDest = _m_packuswb(mm1, mm2);
1143 unsigned int i, istart;
1144 unsigned char *cursrc1, *cursrc2, *curdst;
1148 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
1155 SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length);
1158 if ((length & 7) > 0) {
1160 istart = length & 0xfffffff8;
1161 cursrc1 = &Src1[istart];
1162 cursrc2 = &Src2[istart];
1163 curdst = &Dest[istart];
1177 for (i = istart; i < length; i++) {
1178 result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2);
1181 *curdst = (
unsigned char) result;
1201 static int SDL_imageFilterBitAndMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
1250 __m64 *mSrc1 = (__m64*)Src1;
1251 __m64 *mSrc2 = (__m64*)Src2;
1252 __m64 *mDest = (__m64*)Dest;
1254 for (i = 0; i < SrcLength/8; i++) {
1255 *mDest = _m_pand(*mSrc1, *mSrc2);
1280 unsigned int i, istart;
1281 unsigned char *cursrc1, *cursrc2, *curdst;
1284 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
1293 SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length);
1296 if ((length & 7) > 0) {
1299 istart = length & 0xfffffff8;
1300 cursrc1 = &Src1[istart];
1301 cursrc2 = &Src2[istart];
1302 curdst = &Dest[istart];
1316 for (i = istart; i < length; i++) {
1317 *curdst = (*cursrc1) & (*cursrc2);
1337 static int SDL_imageFilterBitOrMMX(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
1364 __m64 *mSrc1 = (__m64*)Src1;
1365 __m64 *mSrc2 = (__m64*)Src2;
1366 __m64 *mDest = (__m64*)Dest;
1368 for (i = 0; i < SrcLength/8; i++) {
1369 *mDest = _m_por(*mSrc1, *mSrc2);
1394 unsigned int i, istart;
1395 unsigned char *cursrc1, *cursrc2, *curdst;
1398 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
1406 SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length);
1409 if ((length & 7) > 0) {
1411 istart = length & 0xfffffff8;
1412 cursrc1 = &Src1[istart];
1413 cursrc2 = &Src2[istart];
1414 curdst = &Dest[istart];
1428 for (i = istart; i < length; i++) {
1429 *curdst = *cursrc1 | *cursrc2;
1448 static int SDL_imageFilterDivASM(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int SrcLength)
1487 "1: mov (%%esi), %%bl \n\t"
1490 "movb $255, (%%edi) \n\t"
1492 "2: xor %%ah, %%ah \n\t"
1493 "mov (%%edx), %%al \n\t"
1495 "mov %%al, (%%edi) \n\t"
1508 # elif defined(__x86_64__)
1510 "1: mov (%%rsi), %%bl \n\t"
1513 "movb $255, (%%rdi) \n\t"
1515 "2: xor %%ah, %%ah \n\t"
1516 "mov (%%rdx), %%al \n\t"
1518 "mov %%al, (%%rdi) \n\t"
1529 :
"memory",
"rax",
"rbx"
1549 int SDL_imageFilterDiv(
unsigned char *Src1,
unsigned char *Src2,
unsigned char *Dest,
unsigned int length)
1551 unsigned int i, istart;
1552 unsigned char *cursrc1, *cursrc2, *curdst;
1555 if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL))
1563 SDL_imageFilterDivASM(Src1, Src2, Dest, length);
1591 for (i = istart; i < length; i++) {
1592 if (*cursrc2 == 0) {
1595 *curdst = (int)*cursrc1 / (
int)*cursrc2;
1617 static int SDL_imageFilterBitNegationMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength)
1643 __m64 *mSrc1 = (__m64*)Src1;
1644 __m64 *mDest = (__m64*)Dest;
1646 mm1 = _m_pcmpeqb(mm1, mm1);
1648 for (i = 0; i < SrcLength/8; i++) {
1649 *mDest = _m_pxor(*mSrc1, mm1);
1673 unsigned int i, istart;
1674 unsigned char *cursrc1, *curdst;
1677 if ((Src1 == NULL) || (Dest == NULL))
1684 SDL_imageFilterBitNegationMMX(Src1, Dest, length);
1687 if ((length & 7) > 0) {
1689 istart = length & 0xfffffff8;
1690 cursrc1 = &Src1[istart];
1691 curdst = &Dest[istart];
1704 for (i = istart; i < length; i++) {
1705 *curdst = ~(*cursrc1);
1724 static int SDL_imageFilterAddByteMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char C)
1758 __m64 *mSrc1 = (__m64*)Src1;
1759 __m64 *mDest = (__m64*)Dest;
1763 __m64 mm1 = _m_from_int(i);
1764 __m64 mm2 = _m_from_int(i);
1765 mm1 = _m_punpckldq(mm1, mm2);
1767 for (i = 0; i < SrcLength/8; i++) {
1768 *mDest = _m_paddusb(*mSrc1, mm1);
1793 unsigned int i, istart;
1795 unsigned char *cursrc1, *curdest;
1799 if ((Src1 == NULL) || (Dest == NULL))
1806 memcpy(Src1, Dest, length);
1813 SDL_imageFilterAddByteMMX(Src1, Dest, length, C);
1816 if ((length & 7) > 0) {
1818 istart = length & 0xfffffff8;
1819 cursrc1 = &Src1[istart];
1820 curdest = &Dest[istart];
1834 for (i = istart; i < length; i++) {
1835 result = (int) *cursrc1 + iC;
1838 *curdest = (
unsigned char) result;
1857 static int SDL_imageFilterAddUintMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned int C,
unsigned int D)
1888 __m64 *mSrc1 = (__m64*)Src1;
1889 __m64 *mDest = (__m64*)Dest;
1891 __m64 mm1 = _m_from_int(C);
1892 __m64 mm2 = _m_from_int(C);
1893 mm1 = _m_punpckldq(mm1, mm2);
1896 for (i = 0; i < SrcLength/8; i++) {
1897 *mDest = _m_paddusb(*mSrc1, mm1);
1921 unsigned int i, j, istart, D;
1923 unsigned char *cursrc1;
1924 unsigned char *curdest;
1928 if ((Src1 == NULL) || (Dest == NULL))
1935 memcpy(Src1, Dest, length);
1943 SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D);
1946 if ((length & 7) > 0) {
1948 istart = length & 0xfffffff8;
1949 cursrc1 = &Src1[istart];
1950 curdest = &Dest[istart];
1963 iC[3] = (int) ((C >> 24) & 0xff);
1964 iC[2] = (int) ((C >> 16) & 0xff);
1965 iC[1] = (int) ((C >> 8) & 0xff);
1966 iC[0] = (int) ((C >> 0) & 0xff);
1967 for (i = istart; i < length; i += 4) {
1968 for (j = 0; j < 4; j++) {
1970 result = (int) *cursrc1 + iC[j];
1971 if (result > 255) result = 255;
1972 *curdest = (
unsigned char) result;
1993 static int SDL_imageFilterAddByteToHalfMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char C,
1994 unsigned char *Mask)
2032 __m64 *mSrc1 = (__m64*)Src1;
2033 __m64 *mDest = (__m64*)Dest;
2034 __m64 *mMask = (__m64*)Mask;
2038 __m64 mm1 = _m_from_int(i);
2039 __m64 mm2 = _m_from_int(i);
2040 mm1 = _m_punpckldq(mm1, mm2);
2042 for (i = 0; i < SrcLength/8; i++) {
2043 __m64 mm2 = _m_psrlwi(*mSrc1, 1);
2044 mm2 = _m_pand(mm2, *mMask);
2046 *mDest = _m_paddusb(mm1, mm2);
2070 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
2071 unsigned int i, istart;
2073 unsigned char *cursrc1;
2074 unsigned char *curdest;
2078 if ((Src1 == NULL) || (Dest == NULL))
2086 SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask);
2089 if ((length & 7) > 0) {
2091 istart = length & 0xfffffff8;
2092 cursrc1 = &Src1[istart];
2093 curdest = &Dest[istart];
2107 for (i = istart; i < length; i++) {
2108 result = (int) (*cursrc1 / 2) + iC;
2111 *curdest = (
unsigned char) result;
2164 __m64 *mSrc1 = (__m64*)Src1;
2165 __m64 *mDest = (__m64*)Dest;
2169 __m64 mm1 = _m_from_int(i);
2170 __m64 mm2 = _m_from_int(i);
2171 mm1 = _m_punpckldq(mm1, mm2);
2173 for (i = 0; i < SrcLength/8; i++) {
2174 *mDest = _m_psubusb(*mSrc1, mm1);
2198 unsigned int i, istart;
2200 unsigned char *cursrc1;
2201 unsigned char *curdest;
2205 if ((Src1 == NULL) || (Dest == NULL))
2212 memcpy(Src1, Dest, length);
2222 if ((length & 7) > 0) {
2224 istart = length & 0xfffffff8;
2225 cursrc1 = &Src1[istart];
2226 curdest = &Dest[istart];
2240 for (i = istart; i < length; i++) {
2241 result = (int) *cursrc1 - iC;
2244 *curdest = (
unsigned char) result;
2263 static int SDL_imageFilterSubUintMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned int C,
unsigned int D)
2294 __m64 *mSrc1 = (__m64*)Src1;
2295 __m64 *mDest = (__m64*)Dest;
2297 __m64 mm1 = _m_from_int(C);
2298 __m64 mm2 = _m_from_int(C);
2299 mm1 = _m_punpckldq(mm1, mm2);
2302 for (i = 0; i < SrcLength/8; i++) {
2303 *mDest = _m_psubusb(*mSrc1, mm1);
2327 unsigned int i, j, istart, D;
2329 unsigned char *cursrc1;
2330 unsigned char *curdest;
2334 if ((Src1 == NULL) || (Dest == NULL))
2341 memcpy(Src1, Dest, length);
2349 SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D);
2352 if ((length & 7) > 0) {
2354 istart = length & 0xfffffff8;
2355 cursrc1 = &Src1[istart];
2356 curdest = &Dest[istart];
2369 iC[3] = (int) ((C >> 24) & 0xff);
2370 iC[2] = (int) ((C >> 16) & 0xff);
2371 iC[1] = (int) ((C >> 8) & 0xff);
2372 iC[0] = (int) ((C >> 0) & 0xff);
2373 for (i = istart; i < length; i += 4) {
2374 for (j = 0; j < 4; j++) {
2376 result = (int) *cursrc1 - iC[j];
2377 if (result < 0) result = 0;
2378 *curdest = (
unsigned char) result;
2399 static int SDL_imageFilterShiftRightMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N,
2400 unsigned char *Mask)
2440 __m64 *mSrc1 = (__m64*)Src1;
2441 __m64 *mDest = (__m64*)Dest;
2442 __m64 *mMask = (__m64*)Mask;
2445 mm1 = _m_pcmpeqb(mm1, mm1);
2447 for (i = 0; i < N; i++) {
2448 mm1 = _m_psrlwi(mm1, 1);
2449 mm1 = _m_pand(mm1, *mMask);
2452 for (i = 0; i < SrcLength/8; i++) {
2453 __m64 mm0 = _m_psrlwi(*mSrc1, N);
2454 *mDest = _m_pand(mm0, mm1);
2478 static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F };
2479 unsigned int i, istart;
2480 unsigned char *cursrc1;
2481 unsigned char *curdest;
2484 if ((Src1 == NULL) || (Dest == NULL))
2496 memcpy(Src1, Dest, length);
2503 SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask);
2506 if ((length & 7) > 0) {
2508 istart = length & 0xfffffff8;
2509 cursrc1 = &Src1[istart];
2510 curdest = &Dest[istart];
2523 for (i = istart; i < length; i++) {
2524 *curdest = (
unsigned char) *cursrc1 >> N;
2543 static int SDL_imageFilterShiftRightUintMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N)
2568 __m64 *mSrc1 = (__m64*)Src1;
2569 __m64 *mDest = (__m64*)Dest;
2571 for (i = 0; i < SrcLength/8; i++) {
2572 *mDest = _m_psrldi(*mSrc1, N);
2596 unsigned int i, istart;
2597 unsigned char *cursrc1, *curdest;
2598 unsigned int *icursrc1, *icurdest;
2599 unsigned int result;
2602 if ((Src1 == NULL) || (Dest == NULL))
2613 memcpy(Src1, Dest, length);
2619 SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N);
2622 if ((length & 7) > 0) {
2624 istart = length & 0xfffffff8;
2625 cursrc1 = &Src1[istart];
2626 curdest = &Dest[istart];
2639 icursrc1=(
unsigned int *)cursrc1;
2640 icurdest=(
unsigned int *)curdest;
2641 for (i = istart; i < length; i += 4) {
2643 result = ((
unsigned int)*icursrc1 >> N);
2664 static int SDL_imageFilterMultByByteMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char C)
2731 __m64 *mSrc1 = (__m64*)Src1;
2732 __m64 *mDest = (__m64*)Dest;
2733 __m64 mm0 = _m_from_int(0);
2737 __m64 mm1 = _m_from_int(i);
2738 __m64 mm2 = _m_from_int(i);
2739 mm1 = _m_punpckldq(mm1, mm2);
2743 for (i = 0; i < SrcLength/8; i++) {
2745 mm3 = _m_punpcklbw(*mSrc1, mm0);
2746 mm4 = _m_punpckhbw(*mSrc1, mm0);
2747 mm3 = _m_pmullw(mm3, mm1);
2748 mm4 = _m_pmullw(mm4, mm1);
2749 *mDest = _m_packuswb(mm3, mm4);
2754 for (i = 0; i < SrcLength/8; i++) {
2755 __m64 mm3, mm4, mm5, mm6;
2756 mm3 = _m_punpcklbw(*mSrc1, mm0);
2757 mm4 = _m_punpckhbw(*mSrc1, mm0);
2758 mm3 = _m_pmullw(mm3, mm1);
2759 mm4 = _m_pmullw(mm4, mm1);
2761 mm5 = _m_psrawi(mm3, 15);
2762 mm6 = _m_psrawi(mm4, 15);
2763 mm3 = _m_pxor(mm3, mm5);
2764 mm4 = _m_pxor(mm4, mm6);
2765 mm3 = _m_psubsw(mm3, mm5);
2766 mm4 = _m_psubsw(mm4, mm6);
2767 *mDest = _m_packuswb(mm3, mm4);
2792 unsigned int i, istart;
2794 unsigned char *cursrc1;
2795 unsigned char *curdest;
2799 if ((Src1 == NULL) || (Dest == NULL))
2806 memcpy(Src1, Dest, length);
2812 SDL_imageFilterMultByByteMMX(Src1, Dest, length, C);
2815 if ((length & 7) > 0) {
2817 istart = length & 0xfffffff8;
2818 cursrc1 = &Src1[istart];
2819 curdest = &Dest[istart];
2833 for (i = istart; i < length; i++) {
2834 result = (int) *cursrc1 * iC;
2837 *curdest = (
unsigned char) result;
2857 static int SDL_imageFilterShiftRightAndMultByByteMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N,
2903 __m64 *mSrc1 = (__m64*)Src1;
2904 __m64 *mDest = (__m64*)Dest;
2905 __m64 mm0 = _m_from_int(0);
2909 __m64 mm1 = _m_from_int(i);
2910 __m64 mm2 = _m_from_int(i);
2911 mm1 = _m_punpckldq(mm1, mm2);
2912 for (i = 0; i < SrcLength/8; i++) {
2913 __m64 mm3, mm4, mm5, mm6;
2914 mm3 = _m_punpcklbw(*mSrc1, mm0);
2915 mm4 = _m_punpckhbw(*mSrc1, mm0);
2916 mm3 = _m_psrlwi(mm3, N);
2917 mm4 = _m_psrlwi(mm4, N);
2918 mm3 = _m_pmullw(mm3, mm1);
2919 mm4 = _m_pmullw(mm4, mm1);
2920 *mDest = _m_packuswb(mm3, mm4);
2946 unsigned int i, istart;
2948 unsigned char *cursrc1;
2949 unsigned char *curdest;
2953 if ((Src1 == NULL) || (Dest == NULL))
2964 if ((N == 0) && (C == 1)) {
2965 memcpy(Src1, Dest, length);
2971 SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C);
2974 if ((length & 7) > 0) {
2976 istart = length & 0xfffffff8;
2977 cursrc1 = &Src1[istart];
2978 curdest = &Dest[istart];
2992 for (i = istart; i < length; i++) {
2993 result = (int) (*cursrc1 >> N) * iC;
2996 *curdest = (
unsigned char) result;
3016 static int SDL_imageFilterShiftLeftByteMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N,
3017 unsigned char *Mask)
3057 __m64 *mSrc1 = (__m64*)Src1;
3058 __m64 *mDest = (__m64*)Dest;
3059 __m64 *mMask = (__m64*)Mask;
3062 mm1 = _m_pcmpeqb(mm1, mm1);
3064 for (i = 0; i < N; i++) {
3065 mm1 = _m_psllwi(mm1, 1);
3066 mm1 = _m_pand(mm1, *mMask);
3069 for (i = 0; i < SrcLength/8; i++) {
3070 __m64 mm0 = _m_psllwi(*mSrc1, N);
3071 *mDest = _m_pand(mm0, mm1);
3095 static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE };
3096 unsigned int i, istart;
3097 unsigned char *cursrc1, *curdest;
3101 if ((Src1 == NULL) || (Dest == NULL))
3112 memcpy(Src1, Dest, length);
3118 SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask);
3121 if ((length & 7) > 0) {
3123 istart = length & 0xfffffff8;
3124 cursrc1 = &Src1[istart];
3125 curdest = &Dest[istart];
3138 for (i = istart; i < length; i++) {
3139 result = ((int) *cursrc1 << N) & 0xff;
3140 *curdest = (
unsigned char) result;
3159 static int SDL_imageFilterShiftLeftUintMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N)
3184 __m64 *mSrc1 = (__m64*)Src1;
3185 __m64 *mDest = (__m64*)Dest;
3187 for (i = 0; i < SrcLength/8; i++) {
3188 *mDest = _m_pslldi(*mSrc1, N);
3212 unsigned int i, istart;
3213 unsigned char *cursrc1, *curdest;
3214 unsigned int *icursrc1, *icurdest;
3215 unsigned int result;
3218 if ((Src1 == NULL) || (Dest == NULL))
3229 memcpy(Src1, Dest, length);
3235 SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N);
3238 if ((length & 7) > 0) {
3240 istart = length & 0xfffffff8;
3241 cursrc1 = &Src1[istart];
3242 curdest = &Dest[istart];
3255 icursrc1=(
unsigned int *)cursrc1;
3256 icurdest=(
unsigned int *)curdest;
3257 for (i = istart; i < length; i += 4) {
3259 result = ((
unsigned int)*icursrc1 << N);
3280 static int SDL_imageFilterShiftLeftMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char N)
3341 __m64 *mSrc1 = (__m64*)Src1;
3342 __m64 *mDest = (__m64*)Dest;
3343 __m64 mm0 = _m_from_int(0);
3346 for (i = 0; i < SrcLength/8; i++) {
3348 mm3 = _m_punpcklbw(*mSrc1, mm0);
3349 mm4 = _m_punpckhbw(*mSrc1, mm0);
3350 mm3 = _m_psllwi(mm3, N);
3351 mm4 = _m_psllwi(mm4, N);
3352 *mDest = _m_packuswb(mm3, mm4);
3357 for (i = 0; i < SrcLength/8; i++) {
3358 __m64 mm3, mm4, mm5, mm6;
3359 mm3 = _m_punpcklbw(*mSrc1, mm0);
3360 mm4 = _m_punpckhbw(*mSrc1, mm0);
3361 mm3 = _m_psllwi(mm3, N);
3362 mm4 = _m_psllwi(mm4, N);
3364 mm5 = _m_psrawi(mm3, 15);
3365 mm6 = _m_psrawi(mm4, 15);
3366 mm3 = _m_pxor(mm3, mm5);
3367 mm4 = _m_pxor(mm4, mm6);
3368 mm3 = _m_psubsw(mm3, mm5);
3369 mm4 = _m_psubsw(mm4, mm6);
3370 *mDest = _m_packuswb(mm3, mm4);
3395 unsigned int i, istart;
3396 unsigned char *cursrc1, *curdest;
3400 if ((Src1 == NULL) || (Dest == NULL))
3411 memcpy(Src1, Dest, length);
3417 SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N);
3420 if ((length & 7) > 0) {
3422 istart = length & 0xfffffff8;
3423 cursrc1 = &Src1[istart];
3424 curdest = &Dest[istart];
3437 for (i = istart; i < length; i++) {
3438 result = (int) *cursrc1 << N;
3441 *curdest = (
unsigned char) result;
3460 static int SDL_imageFilterBinarizeUsingThresholdMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char T)
3498 __m64 *mSrc1 = (__m64*)Src1;
3499 __m64 *mDest = (__m64*)Dest;
3501 __m64 mm1 = _m_pcmpeqb(mm1, mm1);
3502 __m64 mm2 = _m_pcmpeqb(mm2, mm2);
3505 __m64 mm3 = _m_from_int(i);
3506 __m64 mm4 = _m_from_int(i);
3507 mm3 = _m_punpckldq(mm3, mm4);
3508 mm2 = _m_psubusb(mm2, mm3);
3510 for (i = 0; i < SrcLength/8; i++) {
3511 __m64 mm0 = _m_paddusb(*mSrc1, mm2);
3512 *mDest = _m_pcmpeqb(mm0, mm1);
3536 unsigned int i, istart;
3537 unsigned char *cursrc1;
3538 unsigned char *curdest;
3541 if ((Src1 == NULL) || (Dest == NULL))
3548 memset(Dest, 255, length);
3554 SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T);
3557 if ((length & 7) > 0) {
3559 istart = length & 0xfffffff8;
3560 cursrc1 = &Src1[istart];
3561 curdest = &Dest[istart];
3574 for (i = istart; i < length; i++) {
3575 *curdest = (
unsigned char)(((
unsigned char)*cursrc1 >= T) ? 255 : 0);
3595 static int SDL_imageFilterClipToRangeMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
unsigned char Tmin,
3645 __m64 *mSrc1 = (__m64*)Src1;
3646 __m64 *mDest = (__m64*)Dest;
3647 __m64 mm1 = _m_pcmpeqb(mm1, mm1);
3651 memset(&i, Tmax, 4);
3652 mm3 = _m_from_int(i);
3653 mm4 = _m_from_int(i);
3654 mm3 = _m_punpckldq(mm3, mm4);
3655 mm1 = _m_psubusb(mm1, mm3);
3659 memset(&i, Tmin, 4);
3660 mm5 = _m_from_int(i);
3661 mm4 = _m_from_int(i);
3662 mm5 = _m_punpckldq(mm5, mm4);
3663 mm7 = _m_paddusb(mm5, mm1);
3664 for (i = 0; i < SrcLength/8; i++) {
3666 mm0 = _m_paddusb(*mSrc1, mm1);
3667 mm0 = _m_psubusb(mm0, mm7);
3668 *mDest = _m_paddusb(mm0, mm5);
3694 unsigned int i, istart;
3695 unsigned char *cursrc1;
3696 unsigned char *curdest;
3699 if ((Src1 == NULL) || (Dest == NULL))
3705 if ((Tmin == 0) && (Tmax == 25)) {
3706 memcpy(Src1, Dest, length);
3712 SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax);
3715 if ((length & 7) > 0) {
3717 istart = length & 0xfffffff8;
3718 cursrc1 = &Src1[istart];
3719 curdest = &Dest[istart];
3732 for (i = istart; i < length; i++) {
3733 if (*cursrc1 < Tmin) {
3735 }
else if (*cursrc1 > Tmax) {
3738 *curdest = *cursrc1;
3761 static int SDL_imageFilterNormalizeLinearMMX(
unsigned char *Src1,
unsigned char *Dest,
unsigned int SrcLength,
int Cmin,
int Cmax,
3769 mov ax, WORD PTR Nmax
3770 mov bx, WORD PTR Cmax
3771 sub ax, WORD PTR Nmin
3772 sub bx, WORD PTR Cmin
3787 mov ax, WORD PTR Cmin
3795 mov ax, WORD PTR Nmin
3839 __m64 *mSrc1 = (__m64*)Src1;
3840 __m64 *mDest = (__m64*)Dest;
3841 __m64 mm0, mm1, mm2, mm3;
3845 unsigned short a = Nmax - Nmin;
3846 unsigned short b = Cmax - Cmin;
3853 mm0 = _m_from_int(i);
3854 mm1 = _m_from_int(i);
3855 mm0 = _m_punpckldq(mm0, mm1);
3857 i = (Cmin<<16)|(
short)Cmin;
3858 mm1 = _m_from_int(i);
3859 mm2 = _m_from_int(i);
3860 mm1 = _m_punpckldq(mm1, mm2);
3862 i = (Nmin<<16)|(
short)Nmin;
3863 mm2 = _m_from_int(i);
3864 mm3 = _m_from_int(i);
3865 mm2 = _m_punpckldq(mm2, mm3);
3866 __m64 mm7 = _m_from_int(0);
3867 for (i = 0; i < SrcLength/8; i++) {
3868 __m64 mm3, mm4, mm5, mm6;
3869 mm3 = _m_punpcklbw(*mSrc1, mm7);
3870 mm4 = _m_punpckhbw(*mSrc1, mm7);
3871 mm3 = _m_psubusb(mm3, mm1);
3872 mm4 = _m_psubusb(mm4, mm1);
3873 mm3 = _m_pmullw(mm3, mm0);
3874 mm4 = _m_pmullw(mm4, mm0);
3875 mm3 = _m_paddusb(mm3, mm2);
3876 mm4 = _m_paddusb(mm4, mm2);
3878 mm5 = _m_psrawi(mm3, 15);
3879 mm6 = _m_psrawi(mm4, 15);
3880 mm3 = _m_pxor(mm3, mm5);
3881 mm4 = _m_pxor(mm4, mm6);
3882 mm3 = _m_psubsw(mm3, mm5);
3883 mm4 = _m_psubsw(mm4, mm6);
3884 *mDest = _m_packuswb(mm3, mm4);
3912 unsigned int i, istart;
3913 unsigned char *cursrc;
3914 unsigned char *curdest;
3919 if ((Src == NULL) || (Dest == NULL))
3926 SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax);
3929 if ((length & 7) > 0) {
3931 istart = length & 0xfffffff8;
3932 cursrc = &Src[istart];
3933 curdest = &Dest[istart];
3951 for (i = istart; i < length; i++) {
3952 result = factor * ((int) (*cursrc) - Cmin) + Nmin;
3955 *curdest = (
unsigned char) result;
3981 signed short *Kernel,
unsigned char Divisor)
3984 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
3987 if ((columns < 3) || (rows < 3) || (Divisor == 0))
3992 #if defined(USE_MMX) && defined(i386)
4071 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
4072 "xor %%ebx, %%ebx \n\t"
4074 "mov %4, %%edx \n\t"
4075 "movq (%%edx), %%mm5 \n\t"
4076 "add $8, %%edx \n\t"
4077 "movq (%%edx), %%mm6 \n\t"
4078 "add $8, %%edx \n\t"
4079 "movq (%%edx), %%mm7 \n\t"
4081 "mov %3, %%eax \n\t"
4082 "mov %1, %%esi \n\t"
4083 "mov %0, %%edi \n\t"
4084 "add %%eax, %%edi \n\t"
4086 "mov %2, %%edx \n\t"
4087 "sub $2, %%edx \n\t"
4089 ".L10320: \n\t" "mov %%eax, %%ecx \n\t"
4090 "sub $2, %%ecx \n\t"
4094 "movq (%%esi), %%mm1 \n\t"
4095 "add %%eax, %%esi \n\t"
4096 "movq (%%esi), %%mm2 \n\t"
4097 "add %%eax, %%esi \n\t"
4098 "movq (%%esi), %%mm3 \n\t"
4099 "punpcklbw %%mm0, %%mm1 \n\t"
4100 "punpcklbw %%mm0, %%mm2 \n\t"
4101 "punpcklbw %%mm0, %%mm3 \n\t"
4102 "pmullw %%mm5, %%mm1 \n\t"
4103 "pmullw %%mm6, %%mm2 \n\t"
4104 "pmullw %%mm7, %%mm3 \n\t"
4105 "paddsw %%mm2, %%mm1 \n\t"
4106 "paddsw %%mm3, %%mm1 \n\t"
4107 "movq %%mm1, %%mm2 \n\t"
4108 "psrlq $32, %%mm1 \n\t"
4109 "paddsw %%mm2, %%mm1 \n\t"
4110 "movq %%mm1, %%mm3 \n\t"
4111 "psrlq $16, %%mm1 \n\t"
4112 "paddsw %%mm3, %%mm1 \n\t"
4114 "movd %%eax, %%mm2 \n\t"
4115 "movd %%edx, %%mm3 \n\t"
4116 "movd %%mm1, %%eax \n\t"
4117 "psraw $15, %%mm1 \n\t"
4118 "movd %%mm1, %%edx \n\t"
4120 "movd %%eax, %%mm1 \n\t"
4121 "packuswb %%mm0, %%mm1 \n\t"
4122 "movd %%mm1, %%eax \n\t"
4123 "mov %%al, (%%edi) \n\t"
4124 "movd %%mm3, %%edx \n\t"
4125 "movd %%mm2, %%eax \n\t"
4127 "sub %%eax, %%esi \n\t"
4128 "sub %%eax, %%esi \n\t"
4134 "add $2, %%esi \n\t"
4135 "add $2, %%edi \n\t"
4140 "popa \n\t":
"=m" (Dest)
4171 signed short *Kernel,
unsigned char Divisor)
4174 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
4177 if ((columns < 5) || (rows < 5) || (Divisor == 0))
4182 #if defined(USE_MMX) && defined(i386)
4318 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
4319 "xor %%ebx, %%ebx \n\t"
4321 "movd %%ebx, %%mm5 \n\t"
4322 "mov %4, %%edx \n\t"
4323 "mov %1, %%esi \n\t"
4324 "mov %0, %%edi \n\t"
4325 "add $2, %%edi \n\t"
4326 "mov %3, %%eax \n\t"
4327 "shl $1, %%eax \n\t"
4328 "add %%eax, %%edi \n\t"
4329 "shr $1, %%eax \n\t"
4330 "mov %2, %%ebx \n\t"
4331 "sub $4, %%ebx \n\t"
4333 ".L10330: \n\t" "mov %%eax, %%ecx \n\t"
4334 "sub $4, %%ecx \n\t"
4336 ".L10332: \n\t" "pxor %%mm7, %%mm7 \n\t"
4337 "movd %%esi, %%mm6 \n\t"
4339 "movq (%%esi), %%mm1 \n\t"
4340 "movq %%mm1, %%mm2 \n\t"
4341 "add %%eax, %%esi \n\t"
4342 "movq (%%edx), %%mm3 \n\t"
4343 "add $8, %%edx \n\t"
4344 "movq (%%edx), %%mm4 \n\t"
4345 "add $8, %%edx \n\t"
4346 "punpcklbw %%mm0, %%mm1 \n\t"
4347 "punpckhbw %%mm0, %%mm2 \n\t"
4348 "pmullw %%mm3, %%mm1 \n\t"
4349 "pmullw %%mm4, %%mm2 \n\t"
4350 "paddsw %%mm2, %%mm1 \n\t"
4351 "paddsw %%mm1, %%mm7 \n\t"
4353 "movq (%%esi), %%mm1 \n\t"
4354 "movq %%mm1, %%mm2 \n\t"
4355 "add %%eax, %%esi \n\t"
4356 "movq (%%edx), %%mm3 \n\t"
4357 "add $8, %%edx \n\t"
4358 "movq (%%edx), %%mm4 \n\t"
4359 "add $8, %%edx \n\t"
4360 "punpcklbw %%mm0, %%mm1 \n\t"
4361 "punpckhbw %%mm0, %%mm2 \n\t"
4362 "pmullw %%mm3, %%mm1 \n\t"
4363 "pmullw %%mm4, %%mm2 \n\t"
4364 "paddsw %%mm2, %%mm1 \n\t"
4365 "paddsw %%mm1, %%mm7 \n\t"
4367 "movq (%%esi), %%mm1 \n\t"
4368 "movq %%mm1, %%mm2 \n\t"
4369 "add %%eax, %%esi \n\t"
4370 "movq (%%edx), %%mm3 \n\t"
4371 "add $8, %%edx \n\t"
4372 "movq (%%edx), %%mm4 \n\t"
4373 "add $8, %%edx \n\t"
4374 "punpcklbw %%mm0, %%mm1 \n\t"
4375 "punpckhbw %%mm0, %%mm2 \n\t"
4376 "pmullw %%mm3, %%mm1 \n\t"
4377 "pmullw %%mm4, %%mm2 \n\t"
4378 "paddsw %%mm2, %%mm1 \n\t"
4379 "paddsw %%mm1, %%mm7 \n\t"
4381 "movq (%%esi), %%mm1 \n\t"
4382 "movq %%mm1, %%mm2 \n\t"
4383 "add %%eax, %%esi \n\t"
4384 "movq (%%edx), %%mm3 \n\t"
4385 "add $8, %%edx \n\t"
4386 "movq (%%edx), %%mm4 \n\t"
4387 "add $8, %%edx \n\t"
4388 "punpcklbw %%mm0, %%mm1 \n\t"
4389 "punpckhbw %%mm0, %%mm2 \n\t"
4390 "pmullw %%mm3, %%mm1 \n\t"
4391 "pmullw %%mm4, %%mm2 \n\t"
4392 "paddsw %%mm2, %%mm1 \n\t"
4393 "paddsw %%mm1, %%mm7 \n\t"
4395 "movq (%%esi), %%mm1 \n\t"
4396 "movq %%mm1, %%mm2 \n\t"
4397 "movq (%%edx), %%mm3 \n\t"
4398 "add $8, %%edx \n\t"
4399 "movq (%%edx), %%mm4 \n\t"
4400 "punpcklbw %%mm0, %%mm1 \n\t"
4401 "punpckhbw %%mm0, %%mm2 \n\t"
4402 "pmullw %%mm3, %%mm1 \n\t"
4403 "pmullw %%mm4, %%mm2 \n\t"
4404 "paddsw %%mm2, %%mm1 \n\t"
4405 "paddsw %%mm1, %%mm7 \n\t"
4407 "movq %%mm7, %%mm3 \n\t"
4408 "psrlq $32, %%mm7 \n\t"
4409 "paddsw %%mm3, %%mm7 \n\t"
4410 "movq %%mm7, %%mm2 \n\t"
4411 "psrlq $16, %%mm7 \n\t"
4412 "paddsw %%mm2, %%mm7 \n\t"
4414 "movd %%eax, %%mm1 \n\t"
4415 "movd %%ebx, %%mm2 \n\t"
4416 "movd %%edx, %%mm3 \n\t"
4417 "movd %%mm7, %%eax \n\t"
4418 "psraw $15, %%mm7 \n\t"
4419 "movd %%mm5, %%ebx \n\t"
4420 "movd %%mm7, %%edx \n\t"
4422 "movd %%eax, %%mm7 \n\t"
4423 "packuswb %%mm0, %%mm7 \n\t"
4424 "movd %%mm7, %%eax \n\t"
4425 "mov %%al, (%%edi) \n\t"
4426 "movd %%mm3, %%edx \n\t"
4427 "movd %%mm2, %%ebx \n\t"
4428 "movd %%mm1, %%eax \n\t"
4430 "movd %%mm6, %%esi \n\t"
4431 "sub $72, %%edx \n\t"
4437 "add $4, %%esi \n\t"
4438 "add $4, %%edi \n\t"
4443 "popa \n\t":
"=m" (Dest)
4474 signed short *Kernel,
unsigned char Divisor)
4477 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
4480 if ((columns < 7) || (rows < 7) || (Divisor == 0))
4485 #if defined(USE_MMX) && defined(i386)
4649 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
4650 "xor %%ebx, %%ebx \n\t"
4652 "movd %%ebx, %%mm5 \n\t"
4653 "mov %4, %%edx \n\t"
4654 "mov %1, %%esi \n\t"
4655 "mov %0, %%edi \n\t"
4656 "add $3, %%edi \n\t"
4657 "mov %3, %%eax \n\t"
4658 "add %%eax, %%edi \n\t"
4659 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
4660 "sub $6, %%ebx \n\t"
4662 ".L10340: \n\t" "mov %%eax, %%ecx \n\t"
4663 "sub $6, %%ecx \n\t"
4665 ".L10342: \n\t" "pxor %%mm7, %%mm7 \n\t"
4666 "movd %%esi, %%mm6 \n\t"
4668 "movq (%%esi), %%mm1 \n\t"
4669 "movq %%mm1, %%mm2 \n\t"
4670 "add %%eax, %%esi \n\t"
4671 "movq (%%edx), %%mm3 \n\t"
4672 "add $8, %%edx \n\t"
4673 "movq (%%edx), %%mm4 \n\t"
4674 "add $8, %%edx \n\t"
4675 "punpcklbw %%mm0, %%mm1 \n\t"
4676 "punpckhbw %%mm0, %%mm2 \n\t"
4677 "pmullw %%mm3, %%mm1 \n\t"
4678 "pmullw %%mm4, %%mm2 \n\t"
4679 "paddsw %%mm2, %%mm1 \n\t"
4680 "paddsw %%mm1, %%mm7 \n\t"
4682 "movq (%%esi), %%mm1 \n\t"
4683 "movq %%mm1, %%mm2 \n\t"
4684 "add %%eax, %%esi \n\t"
4685 "movq (%%edx), %%mm3 \n\t"
4686 "add $8, %%edx \n\t"
4687 "movq (%%edx), %%mm4 \n\t"
4688 "add $8, %%edx \n\t"
4689 "punpcklbw %%mm0, %%mm1 \n\t"
4690 "punpckhbw %%mm0, %%mm2 \n\t"
4691 "pmullw %%mm3, %%mm1 \n\t"
4692 "pmullw %%mm4, %%mm2 \n\t"
4693 "paddsw %%mm2, %%mm1 \n\t"
4694 "paddsw %%mm1, %%mm7 \n\t"
4696 "movq (%%esi), %%mm1 \n\t"
4697 "movq %%mm1, %%mm2 \n\t"
4698 "add %%eax, %%esi \n\t"
4699 "movq (%%edx), %%mm3 \n\t"
4700 "add $8, %%edx \n\t"
4701 "movq (%%edx), %%mm4 \n\t"
4702 "add $8, %%edx \n\t"
4703 "punpcklbw %%mm0, %%mm1 \n\t"
4704 "punpckhbw %%mm0, %%mm2 \n\t"
4705 "pmullw %%mm3, %%mm1 \n\t"
4706 "pmullw %%mm4, %%mm2 \n\t"
4707 "paddsw %%mm2, %%mm1 \n\t"
4708 "paddsw %%mm1, %%mm7 \n\t"
4710 "movq (%%esi), %%mm1 \n\t"
4711 "movq %%mm1, %%mm2 \n\t"
4712 "add %%eax, %%esi \n\t"
4713 "movq (%%edx), %%mm3 \n\t"
4714 "add $8, %%edx \n\t"
4715 "movq (%%edx), %%mm4 \n\t"
4716 "add $8, %%edx \n\t"
4717 "punpcklbw %%mm0, %%mm1 \n\t"
4718 "punpckhbw %%mm0, %%mm2 \n\t"
4719 "pmullw %%mm3, %%mm1 \n\t"
4720 "pmullw %%mm4, %%mm2 \n\t"
4721 "paddsw %%mm2, %%mm1 \n\t"
4722 "paddsw %%mm1, %%mm7 \n\t"
4724 "movq (%%esi), %%mm1 \n\t"
4725 "movq %%mm1, %%mm2 \n\t"
4726 "add %%eax, %%esi \n\t"
4727 "movq (%%edx), %%mm3 \n\t"
4728 "add $8, %%edx \n\t"
4729 "movq (%%edx), %%mm4 \n\t"
4730 "add $8, %%edx \n\t"
4731 "punpcklbw %%mm0, %%mm1 \n\t"
4732 "punpckhbw %%mm0, %%mm2 \n\t"
4733 "pmullw %%mm3, %%mm1 \n\t"
4734 "pmullw %%mm4, %%mm2 \n\t"
4735 "paddsw %%mm2, %%mm1 \n\t"
4736 "paddsw %%mm1, %%mm7 \n\t"
4738 "movq (%%esi), %%mm1 \n\t"
4739 "movq %%mm1, %%mm2 \n\t"
4740 "add %%eax, %%esi \n\t"
4741 "movq (%%edx), %%mm3 \n\t"
4742 "add $8, %%edx \n\t"
4743 "movq (%%edx), %%mm4 \n\t"
4744 "add $8, %%edx \n\t"
4745 "punpcklbw %%mm0, %%mm1 \n\t"
4746 "punpckhbw %%mm0, %%mm2 \n\t"
4747 "pmullw %%mm3, %%mm1 \n\t"
4748 "pmullw %%mm4, %%mm2 \n\t"
4749 "paddsw %%mm2, %%mm1 \n\t"
4750 "paddsw %%mm1, %%mm7 \n\t"
4752 "movq (%%esi), %%mm1 \n\t"
4753 "movq %%mm1, %%mm2 \n\t"
4754 "movq (%%edx), %%mm3 \n\t"
4755 "add $8, %%edx \n\t"
4756 "movq (%%edx), %%mm4 \n\t"
4757 "punpcklbw %%mm0, %%mm1 \n\t"
4758 "punpckhbw %%mm0, %%mm2 \n\t"
4759 "pmullw %%mm3, %%mm1 \n\t"
4760 "pmullw %%mm4, %%mm2 \n\t"
4761 "paddsw %%mm2, %%mm1 \n\t"
4762 "paddsw %%mm1, %%mm7 \n\t"
4764 "movq %%mm7, %%mm3 \n\t"
4765 "psrlq $32, %%mm7 \n\t"
4766 "paddsw %%mm3, %%mm7 \n\t"
4767 "movq %%mm7, %%mm2 \n\t"
4768 "psrlq $16, %%mm7 \n\t"
4769 "paddsw %%mm2, %%mm7 \n\t"
4771 "movd %%eax, %%mm1 \n\t"
4772 "movd %%ebx, %%mm2 \n\t"
4773 "movd %%edx, %%mm3 \n\t"
4774 "movd %%mm7, %%eax \n\t"
4775 "psraw $15, %%mm7 \n\t"
4776 "movd %%mm5, %%ebx \n\t"
4777 "movd %%mm7, %%edx \n\t"
4779 "movd %%eax, %%mm7 \n\t"
4780 "packuswb %%mm0, %%mm7 \n\t"
4781 "movd %%mm7, %%eax \n\t"
4782 "mov %%al, (%%edi) \n\t"
4783 "movd %%mm3, %%edx \n\t"
4784 "movd %%mm2, %%ebx \n\t"
4785 "movd %%mm1, %%eax \n\t"
4787 "movd %%mm6, %%esi \n\t"
4788 "sub $104, %%edx \n\t"
4794 "add $6, %%esi \n\t"
4795 "add $6, %%edi \n\t"
4800 "popa \n\t":
"=m" (Dest)
4831 signed short *Kernel,
unsigned char Divisor)
4834 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
4837 if ((columns < 9) || (rows < 9) || (Divisor == 0))
4842 #if defined(USE_MMX) && defined(i386)
5106 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
5107 "xor %%ebx, %%ebx \n\t"
5109 "movd %%ebx, %%mm5 \n\t"
5110 "mov %4, %%edx \n\t"
5111 "mov %1, %%esi \n\t"
5112 "mov %0, %%edi \n\t"
5113 "add $4, %%edi \n\t"
5114 "mov %3, %%eax \n\t"
5115 "add %%eax, %%edi \n\t"
5116 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
5117 "sub $8, %%ebx \n\t"
5119 ".L10350: \n\t" "mov %%eax, %%ecx \n\t"
5120 "sub $8, %%ecx \n\t"
5122 ".L10352: \n\t" "pxor %%mm7, %%mm7 \n\t"
5123 "movd %%esi, %%mm6 \n\t"
5125 "movq (%%esi), %%mm1 \n\t"
5126 "movq %%mm1, %%mm2 \n\t"
5128 "movq (%%edx), %%mm3 \n\t"
5129 "add $8, %%edx \n\t"
5130 "movq (%%edx), %%mm4 \n\t"
5131 "add $8, %%edx \n\t"
5132 "punpcklbw %%mm0, %%mm1 \n\t"
5133 "punpckhbw %%mm0, %%mm2 \n\t"
5134 "pmullw %%mm3, %%mm1 \n\t"
5135 "pmullw %%mm4, %%mm2 \n\t"
5136 "paddsw %%mm2, %%mm1 \n\t"
5137 "paddsw %%mm1, %%mm7 \n\t"
5138 "movq (%%esi), %%mm1 \n\t"
5139 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5140 "movq (%%edx), %%mm3 \n\t"
5141 "add $8, %%edx \n\t"
5142 "punpcklbw %%mm0, %%mm1 \n\t"
5143 "pmullw %%mm3, %%mm1 \n\t"
5144 "paddsw %%mm1, %%mm7 \n\t"
5146 "movq (%%esi), %%mm1 \n\t"
5147 "movq %%mm1, %%mm2 \n\t"
5149 "movq (%%edx), %%mm3 \n\t"
5150 "add $8, %%edx \n\t"
5151 "movq (%%edx), %%mm4 \n\t"
5152 "add $8, %%edx \n\t"
5153 "punpcklbw %%mm0, %%mm1 \n\t"
5154 "punpckhbw %%mm0, %%mm2 \n\t"
5155 "pmullw %%mm3, %%mm1 \n\t"
5156 "pmullw %%mm4, %%mm2 \n\t"
5157 "paddsw %%mm2, %%mm1 \n\t"
5158 "paddsw %%mm1, %%mm7 \n\t"
5159 "movq (%%esi), %%mm1 \n\t"
5160 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5161 "movq (%%edx), %%mm3 \n\t"
5162 "add $8, %%edx \n\t"
5163 "punpcklbw %%mm0, %%mm1 \n\t"
5164 "pmullw %%mm3, %%mm1 \n\t"
5165 "paddsw %%mm1, %%mm7 \n\t"
5167 "movq (%%esi), %%mm1 \n\t"
5168 "movq %%mm1, %%mm2 \n\t"
5170 "movq (%%edx), %%mm3 \n\t"
5171 "add $8, %%edx \n\t"
5172 "movq (%%edx), %%mm4 \n\t"
5173 "add $8, %%edx \n\t"
5174 "punpcklbw %%mm0, %%mm1 \n\t"
5175 "punpckhbw %%mm0, %%mm2 \n\t"
5176 "pmullw %%mm3, %%mm1 \n\t"
5177 "pmullw %%mm4, %%mm2 \n\t"
5178 "paddsw %%mm2, %%mm1 \n\t"
5179 "paddsw %%mm1, %%mm7 \n\t"
5180 "movq (%%esi), %%mm1 \n\t"
5181 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5182 "movq (%%edx), %%mm3 \n\t"
5183 "add $8, %%edx \n\t"
5184 "punpcklbw %%mm0, %%mm1 \n\t"
5185 "pmullw %%mm3, %%mm1 \n\t"
5186 "paddsw %%mm1, %%mm7 \n\t"
5188 "movq (%%esi), %%mm1 \n\t"
5189 "movq %%mm1, %%mm2 \n\t"
5191 "movq (%%edx), %%mm3 \n\t"
5192 "add $8, %%edx \n\t"
5193 "movq (%%edx), %%mm4 \n\t"
5194 "add $8, %%edx \n\t"
5195 "punpcklbw %%mm0, %%mm1 \n\t"
5196 "punpckhbw %%mm0, %%mm2 \n\t"
5197 "pmullw %%mm3, %%mm1 \n\t"
5198 "pmullw %%mm4, %%mm2 \n\t"
5199 "paddsw %%mm2, %%mm1 \n\t"
5200 "paddsw %%mm1, %%mm7 \n\t"
5201 "movq (%%esi), %%mm1 \n\t"
5202 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5203 "movq (%%edx), %%mm3 \n\t"
5204 "add $8, %%edx \n\t"
5205 "punpcklbw %%mm0, %%mm1 \n\t"
5206 "pmullw %%mm3, %%mm1 \n\t"
5207 "paddsw %%mm1, %%mm7 \n\t"
5209 "movq (%%esi), %%mm1 \n\t"
5210 "movq %%mm1, %%mm2 \n\t"
5212 "movq (%%edx), %%mm3 \n\t"
5213 "add $8, %%edx \n\t"
5214 "movq (%%edx), %%mm4 \n\t"
5215 "add $8, %%edx \n\t"
5216 "punpcklbw %%mm0, %%mm1 \n\t"
5217 "punpckhbw %%mm0, %%mm2 \n\t"
5218 "pmullw %%mm3, %%mm1 \n\t"
5219 "pmullw %%mm4, %%mm2 \n\t"
5220 "paddsw %%mm2, %%mm1 \n\t"
5221 "paddsw %%mm1, %%mm7 \n\t"
5222 "movq (%%esi), %%mm1 \n\t"
5223 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5224 "movq (%%edx), %%mm3 \n\t"
5225 "add $8, %%edx \n\t"
5226 "punpcklbw %%mm0, %%mm1 \n\t"
5227 "pmullw %%mm3, %%mm1 \n\t"
5228 "paddsw %%mm1, %%mm7 \n\t"
5230 "movq (%%esi), %%mm1 \n\t"
5231 "movq %%mm1, %%mm2 \n\t"
5233 "movq (%%edx), %%mm3 \n\t"
5234 "add $8, %%edx \n\t"
5235 "movq (%%edx), %%mm4 \n\t"
5236 "add $8, %%edx \n\t"
5237 "punpcklbw %%mm0, %%mm1 \n\t"
5238 "punpckhbw %%mm0, %%mm2 \n\t"
5239 "pmullw %%mm3, %%mm1 \n\t"
5240 "pmullw %%mm4, %%mm2 \n\t"
5241 "paddsw %%mm2, %%mm1 \n\t"
5242 "paddsw %%mm1, %%mm7 \n\t"
5243 "movq (%%esi), %%mm1 \n\t"
5244 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5245 "movq (%%edx), %%mm3 \n\t"
5246 "add $8, %%edx \n\t"
5247 "punpcklbw %%mm0, %%mm1 \n\t"
5248 "pmullw %%mm3, %%mm1 \n\t"
5249 "paddsw %%mm1, %%mm7 \n\t"
5251 "movq (%%esi), %%mm1 \n\t"
5252 "movq %%mm1, %%mm2 \n\t"
5254 "movq (%%edx), %%mm3 \n\t"
5255 "add $8, %%edx \n\t"
5256 "movq (%%edx), %%mm4 \n\t"
5257 "add $8, %%edx \n\t"
5258 "punpcklbw %%mm0, %%mm1 \n\t"
5259 "punpckhbw %%mm0, %%mm2 \n\t"
5260 "pmullw %%mm3, %%mm1 \n\t"
5261 "pmullw %%mm4, %%mm2 \n\t"
5262 "paddsw %%mm2, %%mm1 \n\t"
5263 "paddsw %%mm1, %%mm7 \n\t"
5264 "movq (%%esi), %%mm1 \n\t"
5265 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5266 "movq (%%edx), %%mm3 \n\t"
5267 "add $8, %%edx \n\t"
5268 "punpcklbw %%mm0, %%mm1 \n\t"
5269 "pmullw %%mm3, %%mm1 \n\t"
5270 "paddsw %%mm1, %%mm7 \n\t"
5272 "movq (%%esi), %%mm1 \n\t"
5273 "movq %%mm1, %%mm2 \n\t"
5275 "movq (%%edx), %%mm3 \n\t"
5276 "add $8, %%edx \n\t"
5277 "movq (%%edx), %%mm4 \n\t"
5278 "add $8, %%edx \n\t"
5279 "punpcklbw %%mm0, %%mm1 \n\t"
5280 "punpckhbw %%mm0, %%mm2 \n\t"
5281 "pmullw %%mm3, %%mm1 \n\t"
5282 "pmullw %%mm4, %%mm2 \n\t"
5283 "paddsw %%mm2, %%mm1 \n\t"
5284 "paddsw %%mm1, %%mm7 \n\t"
5285 "movq (%%esi), %%mm1 \n\t"
5286 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
5287 "movq (%%edx), %%mm3 \n\t"
5288 "add $8, %%edx \n\t"
5289 "punpcklbw %%mm0, %%mm1 \n\t"
5290 "pmullw %%mm3, %%mm1 \n\t"
5291 "paddsw %%mm1, %%mm7 \n\t"
5293 "movq (%%esi), %%mm1 \n\t"
5294 "movq %%mm1, %%mm2 \n\t"
5296 "movq (%%edx), %%mm3 \n\t"
5297 "add $8, %%edx \n\t"
5298 "movq (%%edx), %%mm4 \n\t"
5299 "add $8, %%edx \n\t"
5300 "punpcklbw %%mm0, %%mm1 \n\t"
5301 "punpckhbw %%mm0, %%mm2 \n\t"
5302 "pmullw %%mm3, %%mm1 \n\t"
5303 "pmullw %%mm4, %%mm2 \n\t"
5304 "paddsw %%mm2, %%mm1 \n\t"
5305 "paddsw %%mm1, %%mm7 \n\t"
5306 "movq (%%esi), %%mm1 \n\t"
5307 "movq (%%edx), %%mm3 \n\t"
5308 "punpcklbw %%mm0, %%mm1 \n\t"
5309 "pmullw %%mm3, %%mm1 \n\t"
5310 "paddsw %%mm1, %%mm7 \n\t"
5312 "movq %%mm7, %%mm3 \n\t"
5313 "psrlq $32, %%mm7 \n\t"
5314 "paddsw %%mm3, %%mm7 \n\t"
5315 "movq %%mm7, %%mm2 \n\t"
5316 "psrlq $16, %%mm7 \n\t"
5317 "paddsw %%mm2, %%mm7 \n\t"
5319 "movd %%eax, %%mm1 \n\t"
5320 "movd %%ebx, %%mm2 \n\t"
5321 "movd %%edx, %%mm3 \n\t"
5322 "movd %%mm7, %%eax \n\t"
5323 "psraw $15, %%mm7 \n\t"
5324 "movd %%mm5, %%ebx \n\t"
5325 "movd %%mm7, %%edx \n\t"
5327 "movd %%eax, %%mm7 \n\t"
5328 "packuswb %%mm0, %%mm7 \n\t"
5329 "movd %%mm7, %%eax \n\t"
5330 "mov %%al, (%%edi) \n\t"
5331 "movd %%mm3, %%edx \n\t"
5332 "movd %%mm2, %%ebx \n\t"
5333 "movd %%mm1, %%eax \n\t"
5335 "movd %%mm6, %%esi \n\t"
5336 "sub $208, %%edx \n\t"
5342 "add $8, %%esi \n\t"
5343 "add $8, %%edi \n\t"
5348 "popa \n\t":
"=m" (Dest)
5379 signed short *Kernel,
unsigned char NRightShift)
5382 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
5385 if ((columns < 3) || (rows < 3) || (NRightShift > 7))
5390 #if defined(USE_MMX) && defined(i386)
5463 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
5464 "xor %%ebx, %%ebx \n\t"
5466 "movd %%ebx, %%mm4 \n\t"
5467 "mov %4, %%edx \n\t"
5468 "movq (%%edx), %%mm5 \n\t"
5469 "add $8, %%edx \n\t"
5470 "movq (%%edx), %%mm6 \n\t"
5471 "add $8, %%edx \n\t"
5472 "movq (%%edx), %%mm7 \n\t"
5474 "mov %3, %%eax \n\t"
5475 "mov %1, %%esi \n\t"
5476 "mov %0, %%edi \n\t"
5477 "add %%eax, %%edi \n\t"
5479 "mov %2, %%edx \n\t"
5480 "sub $2, %%edx \n\t"
5482 ".L10360: \n\t" "mov %%eax, %%ecx \n\t"
5483 "sub $2, %%ecx \n\t"
5487 "movq (%%esi), %%mm1 \n\t"
5488 "add %%eax, %%esi \n\t"
5489 "movq (%%esi), %%mm2 \n\t"
5490 "add %%eax, %%esi \n\t"
5491 "movq (%%esi), %%mm3 \n\t"
5492 "punpcklbw %%mm0, %%mm1 \n\t"
5493 "punpcklbw %%mm0, %%mm2 \n\t"
5494 "punpcklbw %%mm0, %%mm3 \n\t"
5495 "psrlw %%mm4, %%mm1 \n\t"
5496 "psrlw %%mm4, %%mm2 \n\t"
5497 "psrlw %%mm4, %%mm3 \n\t"
5498 "pmullw %%mm5, %%mm1 \n\t"
5499 "pmullw %%mm6, %%mm2 \n\t"
5500 "pmullw %%mm7, %%mm3 \n\t"
5501 "paddsw %%mm2, %%mm1 \n\t"
5502 "paddsw %%mm3, %%mm1 \n\t"
5503 "movq %%mm1, %%mm2 \n\t"
5504 "psrlq $32, %%mm1 \n\t"
5505 "paddsw %%mm2, %%mm1 \n\t"
5506 "movq %%mm1, %%mm3 \n\t"
5507 "psrlq $16, %%mm1 \n\t"
5508 "paddsw %%mm3, %%mm1 \n\t"
5509 "packuswb %%mm0, %%mm1 \n\t"
5510 "movd %%mm1, %%ebx \n\t"
5511 "mov %%bl, (%%edi) \n\t"
5513 "sub %%eax, %%esi \n\t"
5514 "sub %%eax, %%esi \n\t" "inc %%esi \n\t"
5519 "add $2, %%esi \n\t"
5520 "add $2, %%edi \n\t"
5525 "popa \n\t":
"=m" (Dest)
5556 signed short *Kernel,
unsigned char NRightShift)
5559 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
5562 if ((columns < 5) || (rows < 5) || (NRightShift > 7))
5567 #if defined(USE_MMX) && defined(i386)
5702 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
5703 "xor %%ebx, %%ebx \n\t"
5705 "movd %%ebx, %%mm5 \n\t"
5706 "mov %4, %%edx \n\t"
5707 "mov %1, %%esi \n\t"
5708 "mov %0, %%edi \n\t"
5709 "add $2, %%edi \n\t"
5710 "mov %3, %%eax \n\t"
5711 "shl $1, %%eax \n\t"
5712 "add %%eax, %%edi \n\t"
5713 "shr $1, %%eax \n\t"
5714 "mov %2, %%ebx \n\t"
5715 "sub $4, %%ebx \n\t"
5717 ".L10370: \n\t" "mov %%eax, %%ecx \n\t"
5718 "sub $4, %%ecx \n\t"
5720 ".L10372: \n\t" "pxor %%mm7, %%mm7 \n\t"
5721 "movd %%esi, %%mm6 \n\t"
5723 "movq (%%esi), %%mm1 \n\t"
5724 "movq %%mm1, %%mm2 \n\t"
5725 "add %%eax, %%esi \n\t"
5726 "movq (%%edx), %%mm3 \n\t"
5727 "add $8, %%edx \n\t"
5728 "movq (%%edx), %%mm4 \n\t"
5729 "add $8, %%edx \n\t"
5730 "punpcklbw %%mm0, %%mm1 \n\t"
5731 "punpckhbw %%mm0, %%mm2 \n\t"
5732 "psrlw %%mm5, %%mm1 \n\t"
5733 "psrlw %%mm5, %%mm2 \n\t"
5734 "pmullw %%mm3, %%mm1 \n\t"
5735 "pmullw %%mm4, %%mm2 \n\t"
5736 "paddsw %%mm2, %%mm1 \n\t"
5737 "paddsw %%mm1, %%mm7 \n\t"
5739 "movq (%%esi), %%mm1 \n\t"
5740 "movq %%mm1, %%mm2 \n\t"
5741 "add %%eax, %%esi \n\t"
5742 "movq (%%edx), %%mm3 \n\t"
5743 "add $8, %%edx \n\t"
5744 "movq (%%edx), %%mm4 \n\t"
5745 "add $8, %%edx \n\t"
5746 "punpcklbw %%mm0, %%mm1 \n\t"
5747 "punpckhbw %%mm0, %%mm2 \n\t"
5748 "psrlw %%mm5, %%mm1 \n\t"
5749 "psrlw %%mm5, %%mm2 \n\t"
5750 "pmullw %%mm3, %%mm1 \n\t"
5751 "pmullw %%mm4, %%mm2 \n\t"
5752 "paddsw %%mm2, %%mm1 \n\t"
5753 "paddsw %%mm1, %%mm7 \n\t"
5755 "movq (%%esi), %%mm1 \n\t"
5756 "movq %%mm1, %%mm2 \n\t"
5757 "add %%eax, %%esi \n\t"
5758 "movq (%%edx), %%mm3 \n\t"
5759 "add $8, %%edx \n\t"
5760 "movq (%%edx), %%mm4 \n\t"
5761 "add $8, %%edx \n\t"
5762 "punpcklbw %%mm0, %%mm1 \n\t"
5763 "punpckhbw %%mm0, %%mm2 \n\t"
5764 "psrlw %%mm5, %%mm1 \n\t"
5765 "psrlw %%mm5, %%mm2 \n\t"
5766 "pmullw %%mm3, %%mm1 \n\t"
5767 "pmullw %%mm4, %%mm2 \n\t"
5768 "paddsw %%mm2, %%mm1 \n\t"
5769 "paddsw %%mm1, %%mm7 \n\t"
5771 "movq (%%esi), %%mm1 \n\t"
5772 "movq %%mm1, %%mm2 \n\t"
5773 "add %%eax, %%esi \n\t"
5774 "movq (%%edx), %%mm3 \n\t"
5775 "add $8, %%edx \n\t"
5776 "movq (%%edx), %%mm4 \n\t"
5777 "add $8, %%edx \n\t"
5778 "punpcklbw %%mm0, %%mm1 \n\t"
5779 "punpckhbw %%mm0, %%mm2 \n\t"
5780 "psrlw %%mm5, %%mm1 \n\t"
5781 "psrlw %%mm5, %%mm2 \n\t"
5782 "pmullw %%mm3, %%mm1 \n\t"
5783 "pmullw %%mm4, %%mm2 \n\t"
5784 "paddsw %%mm2, %%mm1 \n\t"
5785 "paddsw %%mm1, %%mm7 \n\t"
5787 "movq (%%esi), %%mm1 \n\t"
5788 "movq %%mm1, %%mm2 \n\t"
5789 "movq (%%edx), %%mm3 \n\t"
5790 "add $8, %%edx \n\t"
5791 "movq (%%edx), %%mm4 \n\t"
5792 "punpcklbw %%mm0, %%mm1 \n\t"
5793 "punpckhbw %%mm0, %%mm2 \n\t"
5794 "psrlw %%mm5, %%mm1 \n\t"
5795 "psrlw %%mm5, %%mm2 \n\t"
5796 "pmullw %%mm3, %%mm1 \n\t"
5797 "pmullw %%mm4, %%mm2 \n\t"
5798 "paddsw %%mm2, %%mm1 \n\t"
5799 "paddsw %%mm1, %%mm7 \n\t"
5801 "movq %%mm7, %%mm3 \n\t"
5802 "psrlq $32, %%mm7 \n\t"
5803 "paddsw %%mm3, %%mm7 \n\t"
5804 "movq %%mm7, %%mm2 \n\t"
5805 "psrlq $16, %%mm7 \n\t"
5806 "paddsw %%mm2, %%mm7 \n\t"
5807 "movd %%eax, %%mm1 \n\t"
5808 "packuswb %%mm0, %%mm7 \n\t"
5809 "movd %%mm7, %%eax \n\t"
5810 "mov %%al, (%%edi) \n\t"
5811 "movd %%mm1, %%eax \n\t"
5813 "movd %%mm6, %%esi \n\t"
5814 "sub $72, %%edx \n\t"
5820 "add $4, %%esi \n\t"
5821 "add $4, %%edi \n\t"
5826 "popa \n\t":
"=m" (Dest)
5857 signed short *Kernel,
unsigned char NRightShift)
5860 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
5863 if ((columns < 7) || (rows < 7) || (NRightShift > 7))
5868 #if defined(USE_MMX) && defined(i386)
6035 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
6036 "xor %%ebx, %%ebx \n\t"
6038 "movd %%ebx, %%mm5 \n\t"
6039 "mov %4, %%edx \n\t"
6040 "mov %1, %%esi \n\t"
6041 "mov %0, %%edi \n\t"
6042 "add $3, %%edi \n\t"
6043 "mov %3, %%eax \n\t"
6044 "add %%eax, %%edi \n\t"
6045 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
6046 "sub $6, %%ebx \n\t"
6048 ".L10380: \n\t" "mov %%eax, %%ecx \n\t"
6049 "sub $6, %%ecx \n\t"
6051 ".L10382: \n\t" "pxor %%mm7, %%mm7 \n\t"
6052 "movd %%esi, %%mm6 \n\t"
6054 "movq (%%esi), %%mm1 \n\t"
6055 "movq %%mm1, %%mm2 \n\t"
6056 "add %%eax, %%esi \n\t"
6057 "movq (%%edx), %%mm3 \n\t"
6058 "add $8, %%edx \n\t"
6059 "movq (%%edx), %%mm4 \n\t"
6060 "add $8, %%edx \n\t"
6061 "punpcklbw %%mm0, %%mm1 \n\t"
6062 "punpckhbw %%mm0, %%mm2 \n\t"
6063 "psrlw %%mm5, %%mm1 \n\t"
6064 "psrlw %%mm5, %%mm2 \n\t"
6065 "pmullw %%mm3, %%mm1 \n\t"
6066 "pmullw %%mm4, %%mm2 \n\t"
6067 "paddsw %%mm2, %%mm1 \n\t"
6068 "paddsw %%mm1, %%mm7 \n\t"
6070 "movq (%%esi), %%mm1 \n\t"
6071 "movq %%mm1, %%mm2 \n\t"
6072 "add %%eax, %%esi \n\t"
6073 "movq (%%edx), %%mm3 \n\t"
6074 "add $8, %%edx \n\t"
6075 "movq (%%edx), %%mm4 \n\t"
6076 "add $8, %%edx \n\t"
6077 "punpcklbw %%mm0, %%mm1 \n\t"
6078 "punpckhbw %%mm0, %%mm2 \n\t"
6079 "psrlw %%mm5, %%mm1 \n\t"
6080 "psrlw %%mm5, %%mm2 \n\t"
6081 "pmullw %%mm3, %%mm1 \n\t"
6082 "pmullw %%mm4, %%mm2 \n\t"
6083 "paddsw %%mm2, %%mm1 \n\t"
6084 "paddsw %%mm1, %%mm7 \n\t"
6086 "movq (%%esi), %%mm1 \n\t"
6087 "movq %%mm1, %%mm2 \n\t"
6088 "add %%eax, %%esi \n\t"
6089 "movq (%%edx), %%mm3 \n\t"
6090 "add $8, %%edx \n\t"
6091 "movq (%%edx), %%mm4 \n\t"
6092 "add $8, %%edx \n\t"
6093 "punpcklbw %%mm0, %%mm1 \n\t"
6094 "punpckhbw %%mm0, %%mm2 \n\t"
6095 "psrlw %%mm5, %%mm1 \n\t"
6096 "psrlw %%mm5, %%mm2 \n\t"
6097 "pmullw %%mm3, %%mm1 \n\t"
6098 "pmullw %%mm4, %%mm2 \n\t"
6099 "paddsw %%mm2, %%mm1 \n\t"
6100 "paddsw %%mm1, %%mm7 \n\t"
6102 "movq (%%esi), %%mm1 \n\t"
6103 "movq %%mm1, %%mm2 \n\t"
6104 "add %%eax, %%esi \n\t"
6105 "movq (%%edx), %%mm3 \n\t"
6106 "add $8, %%edx \n\t"
6107 "movq (%%edx), %%mm4 \n\t"
6108 "add $8, %%edx \n\t"
6109 "punpcklbw %%mm0, %%mm1 \n\t"
6110 "punpckhbw %%mm0, %%mm2 \n\t"
6111 "psrlw %%mm5, %%mm1 \n\t"
6112 "psrlw %%mm5, %%mm2 \n\t"
6113 "pmullw %%mm3, %%mm1 \n\t"
6114 "pmullw %%mm4, %%mm2 \n\t"
6115 "paddsw %%mm2, %%mm1 \n\t"
6116 "paddsw %%mm1, %%mm7 \n\t"
6118 "movq (%%esi), %%mm1 \n\t"
6119 "movq %%mm1, %%mm2 \n\t"
6120 "add %%eax, %%esi \n\t"
6121 "movq (%%edx), %%mm3 \n\t"
6122 "add $8, %%edx \n\t"
6123 "movq (%%edx), %%mm4 \n\t"
6124 "add $8, %%edx \n\t"
6125 "punpcklbw %%mm0, %%mm1 \n\t"
6126 "punpckhbw %%mm0, %%mm2 \n\t"
6127 "psrlw %%mm5, %%mm1 \n\t"
6128 "psrlw %%mm5, %%mm2 \n\t"
6129 "pmullw %%mm3, %%mm1 \n\t"
6130 "pmullw %%mm4, %%mm2 \n\t"
6131 "paddsw %%mm2, %%mm1 \n\t"
6132 "paddsw %%mm1, %%mm7 \n\t"
6134 "movq (%%esi), %%mm1 \n\t"
6135 "movq %%mm1, %%mm2 \n\t"
6136 "add %%eax, %%esi \n\t"
6137 "movq (%%edx), %%mm3 \n\t"
6138 "add $8, %%edx \n\t"
6139 "movq (%%edx), %%mm4 \n\t"
6140 "add $8, %%edx \n\t"
6141 "punpcklbw %%mm0, %%mm1 \n\t"
6142 "punpckhbw %%mm0, %%mm2 \n\t"
6143 "psrlw %%mm5, %%mm1 \n\t"
6144 "psrlw %%mm5, %%mm2 \n\t"
6145 "pmullw %%mm3, %%mm1 \n\t"
6146 "pmullw %%mm4, %%mm2 \n\t"
6147 "paddsw %%mm2, %%mm1 \n\t"
6148 "paddsw %%mm1, %%mm7 \n\t"
6150 "movq (%%esi), %%mm1 \n\t"
6151 "movq %%mm1, %%mm2 \n\t"
6152 "movq (%%edx), %%mm3 \n\t"
6153 "add $8, %%edx \n\t"
6154 "movq (%%edx), %%mm4 \n\t"
6155 "punpcklbw %%mm0, %%mm1 \n\t"
6156 "punpckhbw %%mm0, %%mm2 \n\t"
6157 "psrlw %%mm5, %%mm1 \n\t"
6158 "psrlw %%mm5, %%mm2 \n\t"
6159 "pmullw %%mm3, %%mm1 \n\t"
6160 "pmullw %%mm4, %%mm2 \n\t"
6161 "paddsw %%mm2, %%mm1 \n\t"
6162 "paddsw %%mm1, %%mm7 \n\t"
6164 "movq %%mm7, %%mm3 \n\t"
6165 "psrlq $32, %%mm7 \n\t"
6166 "paddsw %%mm3, %%mm7 \n\t"
6167 "movq %%mm7, %%mm2 \n\t"
6168 "psrlq $16, %%mm7 \n\t"
6169 "paddsw %%mm2, %%mm7 \n\t"
6170 "movd %%eax, %%mm1 \n\t"
6171 "packuswb %%mm0, %%mm7 \n\t"
6172 "movd %%mm7, %%eax \n\t"
6173 "mov %%al, (%%edi) \n\t"
6174 "movd %%mm1, %%eax \n\t"
6176 "movd %%mm6, %%esi \n\t"
6177 "sub $104, %%edx \n\t"
6183 "add $6, %%esi \n\t"
6184 "add $6, %%edi \n\t"
6189 "popa \n\t":
"=m" (Dest)
6220 signed short *Kernel,
unsigned char NRightShift)
6223 if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL))
6226 if ((columns < 9) || (rows < 9) || (NRightShift > 7))
6231 #if defined(USE_MMX) && defined(i386)
6511 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
6512 "xor %%ebx, %%ebx \n\t"
6514 "movd %%ebx, %%mm5 \n\t"
6515 "mov %4, %%edx \n\t"
6516 "mov %1, %%esi \n\t"
6517 "mov %0, %%edi \n\t"
6518 "add $4, %%edi \n\t"
6519 "mov %3, %%eax \n\t"
6520 "add %%eax, %%edi \n\t"
6521 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t"
6522 "sub $8, %%ebx \n\t"
6524 ".L10390: \n\t" "mov %%eax, %%ecx \n\t"
6525 "sub $8, %%ecx \n\t"
6527 ".L10392: \n\t" "pxor %%mm7, %%mm7 \n\t"
6528 "movd %%esi, %%mm6 \n\t"
6530 "movq (%%esi), %%mm1 \n\t"
6531 "movq %%mm1, %%mm2 \n\t"
6533 "movq (%%edx), %%mm3 \n\t"
6534 "add $8, %%edx \n\t"
6535 "movq (%%edx), %%mm4 \n\t"
6536 "add $8, %%edx \n\t"
6537 "punpcklbw %%mm0, %%mm1 \n\t"
6538 "punpckhbw %%mm0, %%mm2 \n\t"
6539 "psrlw %%mm5, %%mm1 \n\t"
6540 "psrlw %%mm5, %%mm2 \n\t"
6541 "pmullw %%mm3, %%mm1 \n\t"
6542 "pmullw %%mm4, %%mm2 \n\t"
6543 "paddsw %%mm2, %%mm1 \n\t"
6544 "paddsw %%mm1, %%mm7 \n\t"
6545 "movq (%%esi), %%mm1 \n\t"
6546 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6547 "movq (%%edx), %%mm3 \n\t"
6548 "add $8, %%edx \n\t"
6549 "punpcklbw %%mm0, %%mm1 \n\t"
6550 "psrlw %%mm5, %%mm1 \n\t"
6551 "pmullw %%mm3, %%mm1 \n\t"
6552 "paddsw %%mm1, %%mm7 \n\t"
6554 "movq (%%esi), %%mm1 \n\t"
6555 "movq %%mm1, %%mm2 \n\t"
6557 "movq (%%edx), %%mm3 \n\t"
6558 "add $8, %%edx \n\t"
6559 "movq (%%edx), %%mm4 \n\t"
6560 "add $8, %%edx \n\t"
6561 "punpcklbw %%mm0, %%mm1 \n\t"
6562 "punpckhbw %%mm0, %%mm2 \n\t"
6563 "psrlw %%mm5, %%mm1 \n\t"
6564 "psrlw %%mm5, %%mm2 \n\t"
6565 "pmullw %%mm3, %%mm1 \n\t"
6566 "pmullw %%mm4, %%mm2 \n\t"
6567 "paddsw %%mm2, %%mm1 \n\t"
6568 "paddsw %%mm1, %%mm7 \n\t"
6569 "movq (%%esi), %%mm1 \n\t"
6570 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6571 "movq (%%edx), %%mm3 \n\t"
6572 "add $8, %%edx \n\t"
6573 "punpcklbw %%mm0, %%mm1 \n\t"
6574 "psrlw %%mm5, %%mm1 \n\t"
6575 "pmullw %%mm3, %%mm1 \n\t"
6576 "paddsw %%mm1, %%mm7 \n\t"
6578 "movq (%%esi), %%mm1 \n\t"
6579 "movq %%mm1, %%mm2 \n\t"
6581 "movq (%%edx), %%mm3 \n\t"
6582 "add $8, %%edx \n\t"
6583 "movq (%%edx), %%mm4 \n\t"
6584 "add $8, %%edx \n\t"
6585 "punpcklbw %%mm0, %%mm1 \n\t"
6586 "punpckhbw %%mm0, %%mm2 \n\t"
6587 "psrlw %%mm5, %%mm1 \n\t"
6588 "psrlw %%mm5, %%mm2 \n\t"
6589 "pmullw %%mm3, %%mm1 \n\t"
6590 "pmullw %%mm4, %%mm2 \n\t"
6591 "paddsw %%mm2, %%mm1 \n\t"
6592 "paddsw %%mm1, %%mm7 \n\t"
6593 "movq (%%esi), %%mm1 \n\t"
6594 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6595 "movq (%%edx), %%mm3 \n\t"
6596 "add $8, %%edx \n\t"
6597 "punpcklbw %%mm0, %%mm1 \n\t"
6598 "psrlw %%mm5, %%mm1 \n\t"
6599 "pmullw %%mm3, %%mm1 \n\t"
6600 "paddsw %%mm1, %%mm7 \n\t"
6602 "movq (%%esi), %%mm1 \n\t"
6603 "movq %%mm1, %%mm2 \n\t"
6605 "movq (%%edx), %%mm3 \n\t"
6606 "add $8, %%edx \n\t"
6607 "movq (%%edx), %%mm4 \n\t"
6608 "add $8, %%edx \n\t"
6609 "punpcklbw %%mm0, %%mm1 \n\t"
6610 "punpckhbw %%mm0, %%mm2 \n\t"
6611 "psrlw %%mm5, %%mm1 \n\t"
6612 "psrlw %%mm5, %%mm2 \n\t"
6613 "pmullw %%mm3, %%mm1 \n\t"
6614 "pmullw %%mm4, %%mm2 \n\t"
6615 "paddsw %%mm2, %%mm1 \n\t"
6616 "paddsw %%mm1, %%mm7 \n\t"
6617 "movq (%%esi), %%mm1 \n\t"
6618 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6619 "movq (%%edx), %%mm3 \n\t"
6620 "add $8, %%edx \n\t"
6621 "punpcklbw %%mm0, %%mm1 \n\t"
6622 "psrlw %%mm5, %%mm1 \n\t"
6623 "pmullw %%mm3, %%mm1 \n\t"
6624 "paddsw %%mm1, %%mm7 \n\t"
6626 "movq (%%esi), %%mm1 \n\t"
6627 "movq %%mm1, %%mm2 \n\t"
6629 "movq (%%edx), %%mm3 \n\t"
6630 "add $8, %%edx \n\t"
6631 "movq (%%edx), %%mm4 \n\t"
6632 "add $8, %%edx \n\t"
6633 "punpcklbw %%mm0, %%mm1 \n\t"
6634 "punpckhbw %%mm0, %%mm2 \n\t"
6635 "psrlw %%mm5, %%mm1 \n\t"
6636 "psrlw %%mm5, %%mm2 \n\t"
6637 "pmullw %%mm3, %%mm1 \n\t"
6638 "pmullw %%mm4, %%mm2 \n\t"
6639 "paddsw %%mm2, %%mm1 \n\t"
6640 "paddsw %%mm1, %%mm7 \n\t"
6641 "movq (%%esi), %%mm1 \n\t"
6642 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6643 "movq (%%edx), %%mm3 \n\t"
6644 "add $8, %%edx \n\t"
6645 "punpcklbw %%mm0, %%mm1 \n\t"
6646 "psrlw %%mm5, %%mm1 \n\t"
6647 "pmullw %%mm3, %%mm1 \n\t"
6648 "paddsw %%mm1, %%mm7 \n\t"
6650 "movq (%%esi), %%mm1 \n\t"
6651 "movq %%mm1, %%mm2 \n\t"
6653 "movq (%%edx), %%mm3 \n\t"
6654 "add $8, %%edx \n\t"
6655 "movq (%%edx), %%mm4 \n\t"
6656 "add $8, %%edx \n\t"
6657 "punpcklbw %%mm0, %%mm1 \n\t"
6658 "punpckhbw %%mm0, %%mm2 \n\t"
6659 "psrlw %%mm5, %%mm1 \n\t"
6660 "psrlw %%mm5, %%mm2 \n\t"
6661 "pmullw %%mm3, %%mm1 \n\t"
6662 "pmullw %%mm4, %%mm2 \n\t"
6663 "paddsw %%mm2, %%mm1 \n\t"
6664 "paddsw %%mm1, %%mm7 \n\t"
6665 "movq (%%esi), %%mm1 \n\t"
6666 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6667 "movq (%%edx), %%mm3 \n\t"
6668 "add $8, %%edx \n\t"
6669 "punpcklbw %%mm0, %%mm1 \n\t"
6670 "psrlw %%mm5, %%mm1 \n\t"
6671 "pmullw %%mm3, %%mm1 \n\t"
6672 "paddsw %%mm1, %%mm7 \n\t"
6674 "movq (%%esi), %%mm1 \n\t"
6675 "movq %%mm1, %%mm2 \n\t"
6677 "movq (%%edx), %%mm3 \n\t"
6678 "add $8, %%edx \n\t"
6679 "movq (%%edx), %%mm4 \n\t"
6680 "add $8, %%edx \n\t"
6681 "punpcklbw %%mm0, %%mm1 \n\t"
6682 "punpckhbw %%mm0, %%mm2 \n\t"
6683 "psrlw %%mm5, %%mm1 \n\t"
6684 "psrlw %%mm5, %%mm2 \n\t"
6685 "pmullw %%mm3, %%mm1 \n\t"
6686 "pmullw %%mm4, %%mm2 \n\t"
6687 "paddsw %%mm2, %%mm1 \n\t"
6688 "paddsw %%mm1, %%mm7 \n\t"
6689 "movq (%%esi), %%mm1 \n\t"
6690 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6691 "movq (%%edx), %%mm3 \n\t"
6692 "add $8, %%edx \n\t"
6693 "punpcklbw %%mm0, %%mm1 \n\t"
6694 "psrlw %%mm5, %%mm1 \n\t"
6695 "pmullw %%mm3, %%mm1 \n\t"
6696 "paddsw %%mm1, %%mm7 \n\t"
6698 "movq (%%esi), %%mm1 \n\t"
6699 "movq %%mm1, %%mm2 \n\t"
6701 "movq (%%edx), %%mm3 \n\t"
6702 "add $8, %%edx \n\t"
6703 "movq (%%edx), %%mm4 \n\t"
6704 "add $8, %%edx \n\t"
6705 "punpcklbw %%mm0, %%mm1 \n\t"
6706 "punpckhbw %%mm0, %%mm2 \n\t"
6707 "psrlw %%mm5, %%mm1 \n\t"
6708 "psrlw %%mm5, %%mm2 \n\t"
6709 "pmullw %%mm3, %%mm1 \n\t"
6710 "pmullw %%mm4, %%mm2 \n\t"
6711 "paddsw %%mm2, %%mm1 \n\t"
6712 "paddsw %%mm1, %%mm7 \n\t"
6713 "movq (%%esi), %%mm1 \n\t"
6714 "dec %%esi \n\t" "add %%eax, %%esi \n\t"
6715 "movq (%%edx), %%mm3 \n\t"
6716 "add $8, %%edx \n\t"
6717 "punpcklbw %%mm0, %%mm1 \n\t"
6718 "psrlw %%mm5, %%mm1 \n\t"
6719 "pmullw %%mm3, %%mm1 \n\t"
6720 "paddsw %%mm1, %%mm7 \n\t"
6722 "movq (%%esi), %%mm1 \n\t"
6723 "movq %%mm1, %%mm2 \n\t"
6725 "movq (%%edx), %%mm3 \n\t"
6726 "add $8, %%edx \n\t"
6727 "movq (%%edx), %%mm4 \n\t"
6728 "add $8, %%edx \n\t"
6729 "punpcklbw %%mm0, %%mm1 \n\t"
6730 "punpckhbw %%mm0, %%mm2 \n\t"
6731 "psrlw %%mm5, %%mm1 \n\t"
6732 "psrlw %%mm5, %%mm2 \n\t"
6733 "pmullw %%mm3, %%mm1 \n\t"
6734 "pmullw %%mm4, %%mm2 \n\t"
6735 "paddsw %%mm2, %%mm1 \n\t"
6736 "paddsw %%mm1, %%mm7 \n\t"
6737 "movq (%%esi), %%mm1 \n\t"
6738 "movq (%%edx), %%mm3 \n\t"
6739 "punpcklbw %%mm0, %%mm1 \n\t"
6740 "psrlw %%mm5, %%mm1 \n\t"
6741 "pmullw %%mm3, %%mm1 \n\t"
6742 "paddsw %%mm1, %%mm7 \n\t"
6744 "movq %%mm7, %%mm3 \n\t"
6745 "psrlq $32, %%mm7 \n\t"
6746 "paddsw %%mm3, %%mm7 \n\t"
6747 "movq %%mm7, %%mm2 \n\t"
6748 "psrlq $16, %%mm7 \n\t"
6749 "paddsw %%mm2, %%mm7 \n\t"
6750 "movd %%eax, %%mm1 \n\t"
6751 "packuswb %%mm0, %%mm7 \n\t"
6752 "movd %%mm7, %%eax \n\t"
6753 "mov %%al, (%%edi) \n\t"
6754 "movd %%mm1, %%eax \n\t"
6756 "movd %%mm6, %%esi \n\t"
6757 "sub $208, %%edx \n\t"
6763 "add $8, %%esi \n\t"
6764 "add $8, %%edi \n\t"
6769 "popa \n\t":
"=m" (Dest)
6802 if ((Src == NULL) || (Dest == NULL))
6805 if ((columns < 8) || (rows < 3))
6810 #if defined(USE_MMX) && defined(i386)
6923 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
6924 "mov %3, %%eax \n\t"
6926 "mov %1, %%esi \n\t"
6927 "mov %0, %%edi \n\t"
6928 "add %%eax, %%edi \n\t"
6930 "mov %2, %%edx \n\t"
6931 "sub $2, %%edx \n\t"
6933 ".L10400: \n\t" "mov %%eax, %%ecx \n\t"
6934 "shr $3, %%ecx \n\t"
6935 "mov %%esi, %%ebx \n\t"
6936 "movd %%edi, %%mm1 \n\t"
6940 "movq (%%esi), %%mm4 \n\t"
6941 "movq %%mm4, %%mm5 \n\t"
6942 "add $2, %%esi \n\t"
6943 "punpcklbw %%mm0, %%mm4 \n\t"
6944 "punpckhbw %%mm0, %%mm5 \n\t"
6945 "movq (%%esi), %%mm6 \n\t"
6946 "movq %%mm6, %%mm7 \n\t"
6947 "sub $2, %%esi \n\t"
6948 "punpcklbw %%mm0, %%mm6 \n\t"
6949 "punpckhbw %%mm0, %%mm7 \n\t"
6950 "add %%eax, %%esi \n\t"
6951 "movq (%%esi), %%mm2 \n\t"
6952 "movq %%mm2, %%mm3 \n\t"
6953 "add $2, %%esi \n\t"
6954 "punpcklbw %%mm0, %%mm2 \n\t"
6955 "punpckhbw %%mm0, %%mm3 \n\t"
6956 "paddw %%mm2, %%mm4 \n\t"
6957 "paddw %%mm3, %%mm5 \n\t"
6958 "paddw %%mm2, %%mm4 \n\t"
6959 "paddw %%mm3, %%mm5 \n\t"
6960 "movq (%%esi), %%mm2 \n\t"
6961 "movq %%mm2, %%mm3 \n\t"
6962 "sub $2, %%esi \n\t"
6963 "punpcklbw %%mm0, %%mm2 \n\t"
6964 "punpckhbw %%mm0, %%mm3 \n\t"
6965 "paddw %%mm2, %%mm6 \n\t"
6966 "paddw %%mm3, %%mm7 \n\t"
6967 "paddw %%mm2, %%mm6 \n\t"
6968 "paddw %%mm3, %%mm7 \n\t"
6969 "add %%eax, %%esi \n\t"
6970 "movq (%%esi), %%mm2 \n\t"
6971 "movq %%mm2, %%mm3 \n\t"
6972 "add $2, %%esi \n\t"
6973 "punpcklbw %%mm0, %%mm2 \n\t"
6974 "punpckhbw %%mm0, %%mm3 \n\t"
6975 "paddw %%mm2, %%mm4 \n\t"
6976 "paddw %%mm3, %%mm5 \n\t"
6977 "movq (%%esi), %%mm2 \n\t"
6978 "movq %%mm2, %%mm3 \n\t"
6979 "sub $2, %%esi \n\t"
6980 "punpcklbw %%mm0, %%mm2 \n\t"
6981 "punpckhbw %%mm0, %%mm3 \n\t"
6982 "paddw %%mm2, %%mm6 \n\t"
6983 "paddw %%mm3, %%mm7 \n\t"
6985 "movq %%mm4, %%mm2 \n\t"
6986 "psrlq $32, %%mm4 \n\t"
6987 "psubw %%mm2, %%mm4 \n\t"
6988 "movq %%mm6, %%mm3 \n\t"
6989 "psrlq $32, %%mm6 \n\t"
6990 "psubw %%mm3, %%mm6 \n\t"
6991 "punpckldq %%mm6, %%mm4 \n\t"
6992 "movq %%mm5, %%mm2 \n\t"
6993 "psrlq $32, %%mm5 \n\t"
6994 "psubw %%mm2, %%mm5 \n\t"
6995 "movq %%mm7, %%mm3 \n\t"
6996 "psrlq $32, %%mm7 \n\t"
6997 "psubw %%mm3, %%mm7 \n\t"
6998 "punpckldq %%mm7, %%mm5 \n\t"
7000 "movq %%mm4, %%mm6 \n\t"
7001 "movq %%mm5, %%mm7 \n\t"
7002 "psraw $15, %%mm6 \n\t"
7003 "psraw $15, %%mm7 \n\t"
7004 "pxor %%mm6, %%mm4 \n\t"
7005 "pxor %%mm7, %%mm5 \n\t"
7006 "psubsw %%mm6, %%mm4 \n\t"
7007 "psubsw %%mm7, %%mm5 \n\t"
7008 "packuswb %%mm5, %%mm4 \n\t"
7009 "movq %%mm4, (%%edi) \n\t"
7011 "sub %%eax, %%esi \n\t"
7012 "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t"
7013 "add $8, %%edi \n\t"
7017 "mov %%ebx, %%esi \n\t"
7018 "movd %%mm1, %%edi \n\t"
7019 "add %%eax, %%esi \n\t"
7020 "add %%eax, %%edi \n\t"
7025 "popa \n\t":
"=m" (Dest)
7053 unsigned char NRightShift)
7056 if ((Src == NULL) || (Dest == NULL))
7058 if ((columns < 8) || (rows < 3) || (NRightShift > 7))
7063 #if defined(USE_MMX) && defined(i386)
7191 (
"pusha \n\t" "pxor %%mm0, %%mm0 \n\t"
7192 "mov %3, %%eax \n\t"
7193 "xor %%ebx, %%ebx \n\t"
7195 "movd %%ebx, %%mm1 \n\t"
7197 "mov %1, %%esi \n\t"
7198 "mov %0, %%edi \n\t"
7199 "add %%eax, %%edi \n\t"
7204 ".L10410: \n\t" "mov %%eax, %%ecx \n\t"
7205 "shr $3, %%ecx \n\t"
7206 "mov %%esi, %%ebx \n\t"
7207 "mov %%edi, %%edx \n\t"
7211 "movq (%%esi), %%mm4 \n\t"
7212 "movq %%mm4, %%mm5 \n\t"
7213 "add $2, %%esi \n\t"
7214 "punpcklbw %%mm0, %%mm4 \n\t"
7215 "punpckhbw %%mm0, %%mm5 \n\t"
7216 "psrlw %%mm1, %%mm4 \n\t"
7217 "psrlw %%mm1, %%mm5 \n\t"
7218 "movq (%%esi), %%mm6 \n\t"
7219 "movq %%mm6, %%mm7 \n\t"
7220 "sub $2, %%esi \n\t"
7221 "punpcklbw %%mm0, %%mm6 \n\t"
7222 "punpckhbw %%mm0, %%mm7 \n\t"
7223 "psrlw %%mm1, %%mm6 \n\t"
7224 "psrlw %%mm1, %%mm7 \n\t"
7225 "add %%eax, %%esi \n\t"
7226 "movq (%%esi), %%mm2 \n\t"
7227 "movq %%mm2, %%mm3 \n\t"
7228 "add $2, %%esi \n\t"
7229 "punpcklbw %%mm0, %%mm2 \n\t"
7230 "punpckhbw %%mm0, %%mm3 \n\t"
7231 "psrlw %%mm1, %%mm2 \n\t"
7232 "psrlw %%mm1, %%mm3 \n\t"
7233 "paddw %%mm2, %%mm4 \n\t"
7234 "paddw %%mm3, %%mm5 \n\t"
7235 "paddw %%mm2, %%mm4 \n\t"
7236 "paddw %%mm3, %%mm5 \n\t"
7237 "movq (%%esi), %%mm2 \n\t"
7238 "movq %%mm2, %%mm3 \n\t"
7239 "sub $2, %%esi \n\t"
7240 "punpcklbw %%mm0, %%mm2 \n\t"
7241 "punpckhbw %%mm0, %%mm3 \n\t"
7242 "psrlw %%mm1, %%mm2 \n\t"
7243 "psrlw %%mm1, %%mm3 \n\t"
7244 "paddw %%mm2, %%mm6 \n\t"
7245 "paddw %%mm3, %%mm7 \n\t"
7246 "paddw %%mm2, %%mm6 \n\t"
7247 "paddw %%mm3, %%mm7 \n\t"
7248 "add %%eax, %%esi \n\t"
7249 "movq (%%esi), %%mm2 \n\t"
7250 "movq %%mm2, %%mm3 \n\t"
7251 "add $2, %%esi \n\t"
7252 "punpcklbw %%mm0, %%mm2 \n\t"
7253 "punpckhbw %%mm0, %%mm3 \n\t"
7254 "psrlw %%mm1, %%mm2 \n\t"
7255 "psrlw %%mm1, %%mm3 \n\t"
7256 "paddw %%mm2, %%mm4 \n\t"
7257 "paddw %%mm3, %%mm5 \n\t"
7258 "movq (%%esi), %%mm2 \n\t"
7259 "movq %%mm2, %%mm3 \n\t"
7260 "sub $2, %%esi \n\t"
7261 "punpcklbw %%mm0, %%mm2 \n\t"
7262 "punpckhbw %%mm0, %%mm3 \n\t"
7263 "psrlw %%mm1, %%mm2 \n\t"
7264 "psrlw %%mm1, %%mm3 \n\t"
7265 "paddw %%mm2, %%mm6 \n\t"
7266 "paddw %%mm3, %%mm7 \n\t"
7268 "movq %%mm4, %%mm2 \n\t"
7269 "psrlq $32, %%mm4 \n\t"
7270 "psubw %%mm2, %%mm4 \n\t"
7271 "movq %%mm6, %%mm3 \n\t"
7272 "psrlq $32, %%mm6 \n\t"
7273 "psubw %%mm3, %%mm6 \n\t"
7274 "punpckldq %%mm6, %%mm4 \n\t"
7275 "movq %%mm5, %%mm2 \n\t"
7276 "psrlq $32, %%mm5 \n\t"
7277 "psubw %%mm2, %%mm5 \n\t"
7278 "movq %%mm7, %%mm3 \n\t"
7279 "psrlq $32, %%mm7 \n\t"
7280 "psubw %%mm3, %%mm7 \n\t"
7281 "punpckldq %%mm7, %%mm5 \n\t"
7283 "movq %%mm4, %%mm6 \n\t"
7284 "movq %%mm5, %%mm7 \n\t"
7285 "psraw $15, %%mm6 \n\t"
7286 "psraw $15, %%mm7 \n\t"
7287 "pxor %%mm6, %%mm4 \n\t"
7288 "pxor %%mm7, %%mm5 \n\t"
7289 "psubsw %%mm6, %%mm4 \n\t"
7290 "psubsw %%mm7, %%mm5 \n\t"
7291 "packuswb %%mm5, %%mm4 \n\t"
7292 "movq %%mm4, (%%edi) \n\t"
7294 "sub %%eax, %%esi \n\t"
7295 "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t"
7296 "add $8, %%edi \n\t"
7300 "mov %%ebx, %%esi \n\t"
7301 "mov %%edx, %%edi \n\t"
7302 "add %%eax, %%esi \n\t"
7303 "add %%eax, %%edi \n\t"
7308 "popa \n\t":
"=m" (Dest)
7341 "mov %%esp, %%ebx \n\t"
7342 "sub $4, %%ebx \n\t"
7343 "and $-32, %%ebx \n\t"
7344 "mov %%esp, (%%ebx) \n\t"
7345 "mov %%ebx, %%esp \n\t"
7366 "mov (%%esp), %%ebx \n\t"
7367 "mov %%ebx, %%esp \n\t"
int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int columns)
Filter using SobelX: Dij = saturation255( ... )
int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using MultByByte: D = saturation255(S * C)
int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N, unsigned char C)
Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C)
int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, unsigned char NRightShift)
Filter using SobelXShiftRight: Dij = saturation255( ... )
int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Div: D = S1 / S2.
int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftLeftUint: D = ((uint)S << N)
int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultDivby4: D = saturation255(S1/2 * S2/2)
void SDL_imageFilterRestoreStack(void)
Restore previously aligned stack.
int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength)
Internal ASM Filter using MultNor: D = S1 * S2.
void SDL_imageFilterMMXon()
Enable MMX check for filter functions and use MMX code if available.
int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... )
int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using SubByte: D = saturation0(S - C)
int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Sub: D = saturation0(S1 - S2)
int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftLeftByte: D = (S << N)
int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using AbsDiff: D = | S1 - S2 |.
int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... )
int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... )
int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N)
int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using BitOr: D = S1 | S2.
void SDL_imageFilterMMXoff()
Disable MMX check for filter functions and and force to use non-MMX C based code.
int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultNor: D = S1 * S2.
int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C)
Internal MMX Filter using SubByte: D = saturation0(S - C)
int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C)
int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter using ShiftRight: D = saturation0(S >> N)
int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... )
#define SWAP_32(x)
Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.).
int SDL_imageFilterMMXdetect(void)
MMX detection routine (with override flag).
int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using MultDivby2: D = saturation255(S1/2 * S2)
int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using AddByte: D = saturation255(S + C)
int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using BitAnd: D = S1 & S2.
int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... )
int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T)
Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0.
int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N)
Filter ShiftLeft: D = saturation255(S << N)
int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Add: D = saturation255(S1 + S2)
int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin, int Nmax)
Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin)
int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin, unsigned char Tmax)
Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax.
int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C)
Filter using AddByteToHalf: D = saturation255(S/2 + C)
int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C)
Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C)
int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char NRightShift)
Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... )
int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length)
Filter using BitNegation: D = !S.
int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... )
int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Mean: D = S1/2 + S2/2.
int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, signed short *Kernel, unsigned char Divisor)
Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... )
int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length)
Filter using Mult: D = saturation255(S1 * S2)
void SDL_imageFilterAlignStack(void)
Align stack to 32 byte boundary,.