[x265-commits] [x265] asm: fix invalid read in upShift routine
Murugan Vairavel
murugan at multicorewareinc.com
Thu Apr 3 22:03:59 CEST 2014
details: http://hg.videolan.org/x265/rev/82bbd2bf3b49
branches: stable
changeset: 6657:82bbd2bf3b49
user: Murugan Vairavel <murugan at multicorewareinc.com>
date: Thu Apr 03 11:30:44 2014 +0530
description:
asm: fix invalid read in upShift routine
Subject: [x265] Added tag 0.9 for changeset 82bbd2bf3b49
details: http://hg.videolan.org/x265/rev/640f9177eeb0
branches: stable
changeset: 6658:640f9177eeb0
user: Steve Borho <steve at borho.org>
date: Thu Apr 03 11:54:16 2014 -0500
description:
Added tag 0.9 for changeset 82bbd2bf3b49
Subject: [x265] frameencoder: store the reference state of the picture in FrameEncoder
details: http://hg.videolan.org/x265/rev/36a66ea7a27e
branches:
changeset: 6659:36a66ea7a27e
user: Gopu Govindaswamy
date: Thu Apr 03 17:24:57 2014 +0530
description:
frameencoder: store the reference state of the picture in FrameEncoder
We find that reference state of the reference frame changed during the encode
when we use frame-thread > 1 this cause the CU level QP for the frame is
non-deterministic, this is leading the non-deterministic encoded output for the
frame, to avoid this store the reference state of the frame to
FrameEncoder->m_isReferenced and when the QP is calculate for CU, refer the
reference state of the frame from FrameEncoder->m_isReferenced this stat will
never change during the encode
Moved slice reference state initialization from dpb to FrameEncoder initSlice()
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/eef9a0050728
branches:
changeset: 6660:eef9a0050728
user: Steve Borho <steve at borho.org>
date: Thu Apr 03 11:56:58 2014 -0500
description:
Merge with stable
Subject: [x265] frameencoder: comment nit
details: http://hg.videolan.org/x265/rev/9c1cc2aa053a
branches:
changeset: 6661:9c1cc2aa053a
user: Steve Borho <steve at borho.org>
date: Thu Apr 03 14:47:56 2014 -0500
description:
frameencoder: comment nit
Subject: [x265] frameencoder: use m_isReferenced when configuring SAO in compressFrame()
details: http://hg.videolan.org/x265/rev/8c946aca5824
branches:
changeset: 6662:8c946aca5824
user: Steve Borho <steve at borho.org>
date: Thu Apr 03 14:49:57 2014 -0500
description:
frameencoder: use m_isReferenced when configuring SAO in compressFrame()
In some pessimal situations, the slice's reference state could even be changed
by the time compressFrame() starts. This prevents any race hazard.
diffstat:
.hgtags | 1 +
source/Lib/TLibCommon/TComDataCU.cpp | 24 -
source/Lib/TLibCommon/TComDataCU.h | 3 -
source/Lib/TLibEncoder/TEncSearch.cpp | 4 +-
source/common/pixel.cpp | 4 +-
source/common/primitives.h | 2 +-
source/common/x86/pixel-a.asm | 18 +-
source/common/x86/pixel-util.h | 12 +-
source/common/x86/pixel-util8.asm | 652 +++++++++++++--------------------
source/encoder/compress.cpp | 10 +-
source/encoder/dpb.cpp | 11 -
source/encoder/dpb.h | 2 +-
source/encoder/encoder.cpp | 6 +
source/encoder/frameencoder.cpp | 32 +-
source/encoder/frameencoder.h | 3 +-
source/test/pixelharness.cpp | 41 +-
16 files changed, 335 insertions(+), 490 deletions(-)
diffs (truncated from 1291 to 300 lines):
diff -r e03388e98ecc -r 8c946aca5824 .hgtags
--- a/.hgtags Wed Apr 02 22:51:49 2014 -0500
+++ b/.hgtags Thu Apr 03 14:49:57 2014 -0500
@@ -10,3 +10,4 @@ 69acb3cb777f977f5edde908069ac565915dd366
b970ffbdd696e3ce45c93b315902eb6366ff085e 0.6
d24e2a8c4326b0cd01bfa6c414c5378481af9018 0.7
527d03c56d6860dc979ddea1196f7e94d13d3e82 0.8
+82bbd2bf3b49ba086be0f0922f91fe0084896351 0.9
diff -r e03388e98ecc -r 8c946aca5824 source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Wed Apr 02 22:51:49 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Thu Apr 03 14:49:57 2014 -0500
@@ -91,8 +91,6 @@ TComDataCU::TComDataCU()
m_cuAboveRight = NULL;
m_cuAbove = NULL;
m_cuLeft = NULL;
- m_cuColocated[0] = NULL;
- m_cuColocated[1] = NULL;
m_mvpIdx[0] = NULL;
m_mvpIdx[1] = NULL;
m_chromaFormat = 0;
@@ -280,9 +278,6 @@ void TComDataCU::initCU(TComPic* pic, ui
m_cuAboveLeft = NULL;
m_cuAboveRight = NULL;
- m_cuColocated[0] = NULL;
- m_cuColocated[1] = NULL;
-
uint32_t uiWidthInCU = pic->getFrameWidthInCU();
if (m_cuAddr % uiWidthInCU)
{
@@ -303,16 +298,6 @@ void TComDataCU::initCU(TComPic* pic, ui
{
m_cuAboveRight = pic->getCU(m_cuAddr - uiWidthInCU + 1);
}
-
- if (getSlice()->getNumRefIdx(REF_PIC_LIST_0) > 0)
- {
- m_cuColocated[0] = getSlice()->getRefPic(REF_PIC_LIST_0, 0)->getCU(m_cuAddr);
- }
-
- if (getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 0)
- {
- m_cuColocated[1] = getSlice()->getRefPic(REF_PIC_LIST_1, 0)->getCU(m_cuAddr);
- }
}
/** initialize prediction data with enabling sub-LCU-level delta QP
@@ -457,9 +442,6 @@ void TComDataCU::initSubCU(TComDataCU* c
m_cuAbove = cu->getCUAbove();
m_cuAboveLeft = cu->getCUAboveLeft();
m_cuAboveRight = cu->getCUAboveRight();
-
- m_cuColocated[0] = cu->getCUColocated(REF_PIC_LIST_0);
- m_cuColocated[1] = cu->getCUColocated(REF_PIC_LIST_1);
}
// initialize Sub partition
@@ -526,9 +508,6 @@ void TComDataCU::initSubCU(TComDataCU* c
m_cuAbove = cu->getCUAbove();
m_cuAboveLeft = cu->getCUAboveLeft();
m_cuAboveRight = cu->getCUAboveRight();
-
- m_cuColocated[0] = cu->getCUColocated(REF_PIC_LIST_0);
- m_cuColocated[1] = cu->getCUColocated(REF_PIC_LIST_1);
}
@@ -620,9 +599,6 @@ void TComDataCU::copyPartFrom(TComDataCU
m_cuAbove = cu->getCUAbove();
m_cuLeft = cu->getCULeft();
- m_cuColocated[0] = cu->getCUColocated(REF_PIC_LIST_0);
- m_cuColocated[1] = cu->getCUColocated(REF_PIC_LIST_1);
-
m_cuMvField[0].copyFrom(cu->getCUMvField(REF_PIC_LIST_0), cu->getTotalNumPart(), offset);
m_cuMvField[1].copyFrom(cu->getCUMvField(REF_PIC_LIST_1), cu->getTotalNumPart(), offset);
diff -r e03388e98ecc -r 8c946aca5824 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Wed Apr 02 22:51:49 2014 -0500
+++ b/source/Lib/TLibCommon/TComDataCU.h Thu Apr 03 14:49:57 2014 -0500
@@ -129,7 +129,6 @@ private:
TComDataCU* m_cuAboveRight; ///< pointer of above-right CU
TComDataCU* m_cuAbove; ///< pointer of above CU
TComDataCU* m_cuLeft; ///< pointer of left CU
- TComDataCU* m_cuColocated[2]; ///< pointer of temporally colocated CU's for both directions
// -------------------------------------------------------------------------------------------------------------------
// coding tool information
@@ -387,8 +386,6 @@ public:
TComDataCU* getCUAboveRight() { return m_cuAboveRight; }
- TComDataCU* getCUColocated(int picList) { return m_cuColocated[picList]; }
-
TComDataCU* getPULeft(uint32_t& lPartUnitIdx,
uint32_t curPartUnitIdx,
bool bEnforceSliceRestriction = true,
diff -r e03388e98ecc -r 8c946aca5824 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Apr 02 22:51:49 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Apr 03 14:49:57 2014 -0500
@@ -465,7 +465,7 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
assert(width <= 32);
//===== reconstruction =====
- primitives.calcrecon[size](pred, residual, 0, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
+ primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
//===== update distortion =====
outDist += primitives.sse_sp[part](reconQt, MAX_CU_SIZE, fenc, stride);
}
@@ -587,7 +587,7 @@ void TEncSearch::xIntraCodingChromaBlk(T
assert(((intptr_t)residual & (width - 1)) == 0);
assert(width <= 32);
//===== reconstruction =====
- primitives.calcrecon[size](pred, residual, 0, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
+ primitives.calcrecon[size](pred, residual, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);
//===== update distortion =====
uint32_t dist = primitives.sse_sp[part](reconQt, reconQtStride, fenc, stride);
if (ttype == TEXT_CHROMA_U)
diff -r e03388e98ecc -r 8c946aca5824 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Apr 02 22:51:49 2014 -0500
+++ b/source/common/pixel.cpp Thu Apr 03 14:49:57 2014 -0500
@@ -460,9 +460,7 @@ void getResidual(pixel *fenc, pixel *pre
}
template<int blockSize>
-void calcRecons(pixel* pred, int16_t* residual,
- pixel*,
- int16_t* recqt, pixel* recipred, int stride, int qtstride, int ipredstride)
+void calcRecons(pixel* pred, int16_t* residual, int16_t* recqt, pixel* recipred, int stride, int qtstride, int ipredstride)
{
for (int y = 0; y < blockSize; y++)
{
diff -r e03388e98ecc -r 8c946aca5824 source/common/primitives.h
--- a/source/common/primitives.h Wed Apr 02 22:51:49 2014 -0500
+++ b/source/common/primitives.h Thu Apr 03 14:49:57 2014 -0500
@@ -125,7 +125,7 @@ typedef void (*cvt32to16_shr_t)(int16_t
typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
-typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff, int32_t* lastPos);
typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
diff -r e03388e98ecc -r 8c946aca5824 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Apr 02 22:51:49 2014 -0500
+++ b/source/common/x86/pixel-a.asm Thu Apr 03 14:49:57 2014 -0500
@@ -6525,10 +6525,12 @@ cglobal upShift_8, 7,7,3
.process2:
cmp r4d, 2
jl .process1
- movd m0,[r0]
- pmovzxbw m0,m0
- psllw m0, m2
- movd [r2], m0
+ movzx r3d, byte [r0]
+ shl r3d, 2
+ mov [r2], r3w
+ movzx r3d, byte [r0 + 1]
+ shl r3d, 2
+ mov [r2 + 2], r3w
add r0, 2
add r2, 4
@@ -6536,10 +6538,8 @@ cglobal upShift_8, 7,7,3
jz .end
.process1:
- movd m0,[r0]
- pmovzxbw m0,m0
- psllw m0, m2
- movd r6, m0
- mov [r2], r6w
+ movzx r3d, byte [r0]
+ shl r3d, 2
+ mov [r2], r3w
.end:
RET
diff -r e03388e98ecc -r 8c946aca5824 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Wed Apr 02 22:51:49 2014 -0500
+++ b/source/common/x86/pixel-util.h Thu Apr 03 14:49:57 2014 -0500
@@ -24,12 +24,12 @@
#ifndef X265_PIXEL_UTIL_H
#define X265_PIXEL_UTIL_H
-void x265_calcRecons4_sse2(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons8_sse2(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons16_sse2(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons32_sse2(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons16_sse4(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons32_sse4(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons4_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons8_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons16_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons32_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons16_sse4(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
+void x265_calcRecons32_sse4(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
void x265_getResidual4_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
diff -r e03388e98ecc -r 8c946aca5824 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Wed Apr 02 22:51:49 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Thu Apr 03 14:49:57 2014 -0500
@@ -58,590 +58,452 @@ cextern pw_2000
cextern pw_pixel_max
;-----------------------------------------------------------------------------
-; void calcrecon(pixel* pred, int16_t* residual, pixel* recon, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred)
+; void calcrecon(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred)
;-----------------------------------------------------------------------------
INIT_XMM sse2
-cglobal calcRecons4
%if HIGH_BIT_DEPTH
%if ARCH_X86_64 == 1
- DECLARE_REG_TMP 0,1,2,3,4,5,6,7,8
- PROLOGUE 6,9,6
+cglobal calcRecons4, 5,8,4
+ %define t7b r7b
%else
- DECLARE_REG_TMP 0,1,2,3,4,5
- PROLOGUE 6,7,6
- %define t6 r6m
- %define t6d r6d
- %define t7 r7m
- %define t8d r6d
+cglobal calcRecons4, 5,7,4,0-1
+ %define t7b byte [rsp]
%endif
-
- mov t6d, r6m
-%if ARCH_X86_64 == 0
- add t6d, t6d
- mov r6m, t6d
-%else
+ mov r4d, r4m
mov r5d, r5m
- mov r7d, r7m
- add t6d, t6d
- add t7, t7
-%endif
+ mov r6d, r6m
+ add r4d, r4d
+ add r5d, r5d
+ add r6d, r6d
pxor m4, m4
mova m5, [pw_pixel_max]
- add t5, t5
- mov t8d, 4/2
+ mov t7b, 4/2
.loop:
- movh m0, [t0]
- movh m1, [t0 + t5]
+ movh m0, [r0]
+ movh m1, [r0 + r4]
punpcklqdq m0, m1
- movh m2, [t1]
- movh m3, [t1 + t5]
+ movh m2, [r1]
+ movh m3, [r1 + r4]
punpcklqdq m2, m3
paddw m0, m2
CLIPW m0, m4, m5
- ; store recon[] and recipred[]
- movh [t4], m0
-%if ARCH_X86_64 == 0
- add t4, t7
- add t4, t7
- movhps [t4], m0
- add t4, t7
- add t4, t7
+ ; store recipred[]
+ movh [r3], m0
+ movhps [r3 + r6], m0
+
+ ; store recqt[]
+ movh [r2], m0
+ movhps [r2 + r5], m0
+
+ lea r0, [r0 + r4 * 2]
+ lea r1, [r1 + r4 * 2]
+ lea r2, [r2 + r5 * 2]
+ lea r3, [r3 + r6 * 2]
+
+ dec t7b
+ jnz .loop
+ RET
+%else ;HIGH_BIT_DEPTH
+
+%if ARCH_X86_64 == 1
+cglobal calcRecons4, 5,8,4
+ %define t7b r7b
%else
- movhps [t4 + t7], m0
- lea t4, [t4 + t7 * 2]
+cglobal calcRecons4, 5,7,4,0-1
+ %define t7b byte [rsp]
%endif
-
More information about the x265-commits
mailing list