[x265] [PATCH] fix bug in sa8d_8x8 for psyCost_ss

Divya Manivannan divya at multicorewareinc.com
Fri Jan 9 14:21:07 CET 2015


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1420809613 -19800
#      Fri Jan 09 18:50:13 2015 +0530
# Node ID ec408c0f6df3eb1041507df183135f27b467edea
# Parent  0f4b677cea64254d0b8f77ccc84c785bf832698d
fix bug in sa8d_8x8 for psyCost_ss

diff -r 0f4b677cea64 -r ec408c0f6df3 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Fri Jan 09 13:26:21 2015 +0530
+++ b/source/common/pixel.cpp	Fri Jan 09 18:50:13 2015 +0530
@@ -367,46 +367,55 @@
     return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
 }
 
-inline int _sa8d_8x8(const int16_t* pix1, intptr_t i_pix1, const int16_t* pix2, intptr_t i_pix2)
+inline int _sa8d_8x8(const int16_t* pix1, intptr_t i_pix1)
 {
-    ssum2_t tmp[8][4];
-    ssum2_t a0, a1, a2, a3, a4, a5, a6, a7, b0, b1, b2, b3;
-    ssum2_t sum = 0;
-
-    for (int i = 0; i < 8; i++, pix1 += i_pix1, pix2 += i_pix2)
-    {
-        a0 = pix1[0] - pix2[0];
-        a1 = pix1[1] - pix2[1];
-        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
-        a2 = pix1[2] - pix2[2];
-        a3 = pix1[3] - pix2[3];
-        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
-        a4 = pix1[4] - pix2[4];
-        a5 = pix1[5] - pix2[5];
-        b2 = (a4 + a5) + ((a4 - a5) << BITS_PER_SUM);
-        a6 = pix1[6] - pix2[6];
-        a7 = pix1[7] - pix2[7];
-        b3 = (a6 + a7) + ((a6 - a7) << BITS_PER_SUM);
-        HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], b0, b1, b2, b3);
-    }
-
-    for (int i = 0; i < 4; i++)
-    {
-        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]);
-        HADAMARD4(a4, a5, a6, a7, tmp[4][i], tmp[5][i], tmp[6][i], tmp[7][i]);
-        b0  = abs2(a0 + a4) + abs2(a0 - a4);
-        b0 += abs2(a1 + a5) + abs2(a1 - a5);
-        b0 += abs2(a2 + a6) + abs2(a2 - a6);
-        b0 += abs2(a3 + a7) + abs2(a3 - a7);
-        sum += (sum_t)b0 + (b0 >> BITS_PER_SUM);
-    }
+    int32_t tmp[8][8];
+    int32_t a0, a1, a2, a3, a4, a5, a6, a7;
+    int32_t sum = 0;
+
+    for (int i = 0; i < 8; i++, pix1 += i_pix1)
+    {
+        a0 = pix1[0] + pix1[1];
+        a1 = pix1[2] + pix1[3];
+        a2 = pix1[4] + pix1[5];
+        a3 = pix1[6] + pix1[7];
+        a4 = pix1[0] - pix1[1];
+        a5 = pix1[2] - pix1[3];
+        a6 = pix1[4] - pix1[5];
+        a7 = pix1[6] - pix1[7];
+        tmp[i][0] = (a0 + a1) + (a2 + a3);
+        tmp[i][1] = (a0 + a1) - (a2 + a3);
+        tmp[i][2] = (a0 - a1) + (a2 - a3);
+        tmp[i][3] = (a0 - a1) - (a2 - a3);
+        tmp[i][4] = (a4 + a5) + (a6 + a7);
+        tmp[i][5] = (a4 + a5) - (a6 + a7);
+        tmp[i][6] = (a4 - a5) + (a6 - a7);
+        tmp[i][7] = (a4 - a5) - (a6 - a7);
+    }
+
+    for (int i = 0; i < 8; i++)
+    {
+        a0 = (tmp[0][i] + tmp[1][i]) + (tmp[2][i] + tmp[3][i]);
+        a2 = (tmp[0][i] + tmp[1][i]) - (tmp[2][i] + tmp[3][i]);
+        a1 = (tmp[0][i] - tmp[1][i]) + (tmp[2][i] - tmp[3][i]);
+        a3 = (tmp[0][i] - tmp[1][i]) - (tmp[2][i] - tmp[3][i]);
+        a4 = (tmp[4][i] + tmp[5][i]) + (tmp[6][i] + tmp[7][i]);
+        a6 = (tmp[4][i] + tmp[5][i]) - (tmp[6][i] + tmp[7][i]);
+        a5 = (tmp[4][i] - tmp[5][i]) + (tmp[6][i] - tmp[7][i]);
+        a7 = (tmp[4][i] - tmp[5][i]) - (tmp[6][i] - tmp[7][i]);
+        a0 = abs(a0 + a4) + abs(a0 - a4);
+        a0 += abs(a1 + a5) + abs(a1 - a5);
+        a0 += abs(a2 + a6) + abs(a2 - a6);
+        a0 += abs(a3 + a7) + abs(a3 - a7);
+        sum += a0;
+    }
 
     return (int)sum;
 }
 
-int sa8d_8x8(const int16_t* pix1, intptr_t i_pix1, const int16_t* pix2, intptr_t i_pix2)
+int sa8d_8x8(const int16_t* pix1, intptr_t i_pix1)
 {
-    return (int)((_sa8d_8x8(pix1, i_pix1, pix2, i_pix2) + 2) >> 2);
+    return (int)((_sa8d_8x8(pix1, i_pix1) + 2) >> 2);
 }
 
 int sa8d_16x16(const pixel* pix1, intptr_t i_pix1, const pixel* pix2, intptr_t i_pix2)
@@ -822,9 +831,9 @@
             for (int j = 0; j < dim; j+= 8)
             {
                 /* AC energy, measured by sa8d (AC + DC) minus SAD (DC) */
-                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride, zeroBuf, 0) - 
+                int sourceEnergy = sa8d_8x8(source + i * sstride + j, sstride) - 
                                    (sad<8, 8>(source + i * sstride + j, sstride, zeroBuf, 0) >> 2);
-                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride, zeroBuf, 0) - 
+                int reconEnergy =  sa8d_8x8(recon + i * rstride + j, rstride) - 
                                    (sad<8, 8>(recon + i * rstride + j, rstride, zeroBuf, 0) >> 2);
 
                 totEnergy += abs(sourceEnergy - reconEnergy);


More information about the x265-devel mailing list