[x265] [PATCH 3 of 4] asm: fix bug in luma_p2s and active it in encoder

Min Chen chenm003 at 163.com
Thu Oct 31 14:03:01 CET 2013


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1383224489 -28800
# Node ID 21dbf988079b0e33265ae48578c26347cc779fbe
# Parent  a64e813de628856f6916bdafcc069f19d9d81082
asm: fix bug in luma_p2s and active it in encoder

diff -r a64e813de628 -r 21dbf988079b source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp	Thu Oct 31 21:00:29 2013 +0800
+++ b/source/Lib/TLibCommon/TComPrediction.cpp	Thu Oct 31 21:01:29 2013 +0800
@@ -508,7 +508,7 @@
 {
     int refStride = refPic->getStride();
     int refOffset = (mv->x >> 2) + (mv->y >> 2) * refStride;
-    Pel *ref      =  refPic->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + partAddr) + refOffset;
+    pixel *ref    =  refPic->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + partAddr) + refOffset;
 
     int dstStride = dstPic->m_width;
     int16_t *dst    = dstPic->getLumaAddr(partAddr);
@@ -521,7 +521,7 @@
 
     if ((yFrac | xFrac) == 0)
     {
-        primitives.ipfilter_p2s(ref, refStride, dst, dstStride, width, height);
+        primitives.luma_p2s(ref, refStride, dst, width, height);
     }
     else if (yFrac == 0)
     {
diff -r a64e813de628 -r 21dbf988079b source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Thu Oct 31 21:00:29 2013 +0800
+++ b/source/common/x86/ipfilter8.asm	Thu Oct 31 21:01:29 2013 +0800
@@ -2107,7 +2107,6 @@
     movu        [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
     movu        [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
     movu        [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
-    lea         r5, [r5 + 8]
     je          .nextH
     jmp         .loopW
 
@@ -2119,9 +2118,9 @@
 
 .nextH:
     lea         r0, [r0 + r1 * 4]
-    add         r2, FENC_STRIDE * 2 * 4
-
-    sub         r4, 4
+    add         r2, FENC_STRIDE * 8
+
+    sub         r4d, 4
     jnz         .loopH
 
     RET
diff -r a64e813de628 -r 21dbf988079b source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp	Thu Oct 31 21:00:29 2013 +0800
+++ b/source/test/ipfilterharness.cpp	Thu Oct 31 21:01:29 2013 +0800
@@ -579,6 +579,13 @@
                        pixel_buff, srcStride, IPF_vec_output_s, dstStride, width, height);
     }
 
+    if (opt.luma_p2s)
+    {
+        printf("luma_p2s\t");
+        REPORT_SPEEDUP(opt.luma_p2s, ref.luma_p2s,
+                       pixel_buff, srcStride, IPF_vec_output_s, width, height);
+    }
+
     if (opt.ipfilter_s2p)
     {
         printf("ipfilter_s2p\t");



More information about the x265-devel mailing list