[x265] [PATCH 3 of 4] asm: fix bug in luma_p2s and active it in encoder
Min Chen
chenm003 at 163.com
Thu Oct 31 14:03:01 CET 2013
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1383224489 -28800
# Node ID 21dbf988079b0e33265ae48578c26347cc779fbe
# Parent a64e813de628856f6916bdafcc069f19d9d81082
asm: fix bug in luma_p2s and active it in encoder
diff -r a64e813de628 -r 21dbf988079b source/Lib/TLibCommon/TComPrediction.cpp
--- a/source/Lib/TLibCommon/TComPrediction.cpp Thu Oct 31 21:00:29 2013 +0800
+++ b/source/Lib/TLibCommon/TComPrediction.cpp Thu Oct 31 21:01:29 2013 +0800
@@ -508,7 +508,7 @@
{
int refStride = refPic->getStride();
int refOffset = (mv->x >> 2) + (mv->y >> 2) * refStride;
- Pel *ref = refPic->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + partAddr) + refOffset;
+ pixel *ref = refPic->getLumaAddr(cu->getAddr(), cu->getZorderIdxInCU() + partAddr) + refOffset;
int dstStride = dstPic->m_width;
int16_t *dst = dstPic->getLumaAddr(partAddr);
@@ -521,7 +521,7 @@
if ((yFrac | xFrac) == 0)
{
- primitives.ipfilter_p2s(ref, refStride, dst, dstStride, width, height);
+ primitives.luma_p2s(ref, refStride, dst, width, height);
}
else if (yFrac == 0)
{
diff -r a64e813de628 -r 21dbf988079b source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Thu Oct 31 21:00:29 2013 +0800
+++ b/source/common/x86/ipfilter8.asm Thu Oct 31 21:01:29 2013 +0800
@@ -2107,7 +2107,6 @@
movu [r2 + r5 * 2 + FENC_STRIDE * 2 - 16], m1
movu [r2 + r5 * 2 + FENC_STRIDE * 4 - 16], m2
movu [r2 + r5 * 2 + FENC_STRIDE * 6 - 16], m3
- lea r5, [r5 + 8]
je .nextH
jmp .loopW
@@ -2119,9 +2118,9 @@
.nextH:
lea r0, [r0 + r1 * 4]
- add r2, FENC_STRIDE * 2 * 4
-
- sub r4, 4
+ add r2, FENC_STRIDE * 8
+
+ sub r4d, 4
jnz .loopH
RET
diff -r a64e813de628 -r 21dbf988079b source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp Thu Oct 31 21:00:29 2013 +0800
+++ b/source/test/ipfilterharness.cpp Thu Oct 31 21:01:29 2013 +0800
@@ -579,6 +579,13 @@
pixel_buff, srcStride, IPF_vec_output_s, dstStride, width, height);
}
+ if (opt.luma_p2s)
+ {
+ printf("luma_p2s\t");
+ REPORT_SPEEDUP(opt.luma_p2s, ref.luma_p2s,
+ pixel_buff, srcStride, IPF_vec_output_s, width, height);
+ }
+
if (opt.ipfilter_s2p)
{
printf("ipfilter_s2p\t");
More information about the x265-devel
mailing list