[x265] [PATCH] cleanup: remove unused asm calcrecon
Min Chen
chenm003 at 163.com
Mon Nov 3 21:43:35 CET 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1415047406 28800
# Node ID a7c7c1e849986dc7fa46cd213643b12c5f2866eb
# Parent 244d06de1b999315b5f006be5fc59b10481a2e33
cleanup: remove unused asm calcrecon
diff -r 244d06de1b99 -r a7c7c1e84998 source/common/primitives.h
--- a/source/common/primitives.h Fri Oct 31 17:25:02 2014 -0500
+++ b/source/common/primitives.h Mon Nov 03 12:43:26 2014 -0800
@@ -160,7 +160,6 @@
typedef void (*denoiseDct_t)(int32_t* dctCoef, uint32_t* resSum, uint16_t* offset, int numCoeff);
typedef void (*calcresidual_t)(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
-typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int16_t *qCoef, int qBits, int add, int numCoeff);
typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int16_t *qCoef, int qBits, int add, int numCoeff);
diff -r 244d06de1b99 -r a7c7c1e84998 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Fri Oct 31 17:25:02 2014 -0500
+++ b/source/common/x86/pixel-util.h Mon Nov 03 12:43:26 2014 -0800
@@ -24,13 +24,6 @@
#ifndef X265_PIXEL_UTIL_H
#define X265_PIXEL_UTIL_H
-void x265_calcRecons4_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons8_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons16_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons32_sse2(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons16_sse4(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-void x265_calcRecons32_sse4(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
-
void x265_getResidual4_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual16_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
diff -r 244d06de1b99 -r a7c7c1e84998 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Fri Oct 31 17:25:02 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Mon Nov 03 12:43:26 2014 -0800
@@ -61,447 +61,6 @@
cextern pd_32767
cextern pd_n32768
-;-----------------------------------------------------------------------------
-; void calcrecon(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred)
-;-----------------------------------------------------------------------------
-INIT_XMM sse2
-%if HIGH_BIT_DEPTH
-%if ARCH_X86_64 == 1
-cglobal calcRecons4, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons4, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r4d, r4d
- add r5d, r5d
- add r6d, r6d
-
- pxor m4, m4
- mova m5, [pw_pixel_max]
- mov t7b, 4/2
-.loop:
- movh m0, [r0]
- movh m1, [r0 + r4]
- punpcklqdq m0, m1
- movh m2, [r1]
- movh m3, [r1 + r4]
- punpcklqdq m2, m3
- paddw m0, m2
- CLIPW m0, m4, m5
-
- ; store recipred[]
- movh [r3], m0
- movhps [r3 + r6], m0
-
- ; store recqt[]
- movh [r2], m0
- movhps [r2 + r5], m0
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 2]
- lea r2, [r2 + r5 * 2]
- lea r3, [r3 + r6 * 2]
-
- dec t7b
- jnz .loop
- RET
-%else ;HIGH_BIT_DEPTH
-
-%if ARCH_X86_64 == 1
-cglobal calcRecons4, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons4, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r5d, r5d
-
- pxor m0, m0
- mov t7b, 4/2
-.loop:
- movd m1, [r0]
- movd m2, [r0 + r4]
- punpckldq m1, m2
- punpcklbw m1, m0
- movh m2, [r1]
- movh m3, [r1 + r4 * 2]
- punpcklqdq m2, m3
- paddw m1, m2
- packuswb m1, m1
-
- ; store recon[] and recipred[]
- movd [r3], m1
- pshufd m2, m1, 1
- movd [r3 + r6], m2
-
- ; store recqt[]
- punpcklbw m1, m0
- movh [r2], m1
- movhps [r2 + r5], m1
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 4]
- lea r2, [r2 + r5 * 2]
- lea r3, [r3 + r6 * 2]
-
- dec t7b
- jnz .loop
- RET
-%endif ;HIGH_BIT_DEPTH
-
-
-INIT_XMM sse2
-%if ARCH_X86_64 == 1
-cglobal calcRecons8, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons8, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
-
-%if HIGH_BIT_DEPTH
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r4d, r4d
- add r5d, r5d
- add r6d, r6d
-
- pxor m4, m4
- mova m5, [pw_pixel_max]
- mov t7b, 8/2
-.loop:
- movu m0, [r0]
- movu m1, [r0 + r4]
- movu m2, [r1]
- movu m3, [r1 + r4]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recipred[]
- movu [r3], m0
- movu [r3 + r6], m1
-
- ; store recqt[]
- movu [r2], m0
- movu [r2 + r5], m1
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 2]
- lea r2, [r2 + r5 * 2]
- lea r3, [r3 + r6 * 2]
-
- dec t7b
- jnz .loop
- RET
-%else ;HIGH_BIT_DEPTH
-
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r5d, r5d
-
- pxor m0, m0
- mov t7b, 8/2
-.loop:
- movh m1, [r0]
- movh m2, [r0 + r4]
- punpcklbw m1, m0
- punpcklbw m2, m0
- movu m3, [r1]
- movu m4, [r1 + r4 * 2]
- paddw m1, m3
- paddw m2, m4
- packuswb m1, m2
-
- ; store recon[] and recipred[]
- movh [r3], m1
- movhps [r3 + r6], m1
-
- ; store recqt[]
- punpcklbw m2, m1, m0
- punpckhbw m1, m0
- movu [r2], m2
- movu [r2 + r5], m1
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 4]
- lea r2, [r2 + r5 * 2]
- lea r3, [r3 + r6 * 2]
-
- dec t7b
- jnz .loop
- RET
-%endif ;HIGH_BIT_DEPTH
-
-
-
-%if HIGH_BIT_DEPTH
-INIT_XMM sse2
-%if ARCH_X86_64 == 1
-cglobal calcRecons16, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons16, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
-
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r4d, r4d
- add r5d, r5d
- add r6d, r6d
-
- pxor m4, m4
- mova m5, [pw_pixel_max]
- mov t7b, 16/2
-.loop:
- movu m0, [r0]
- movu m1, [r0 + 16]
- movu m2, [r1]
- movu m3, [r1 + 16]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recipred[]
- movu [r3], m0
- movu [r3 + 16], m1
-
- ; store recqt[]
- movu [r2], m0
- movu [r2 + 16], m1
-
- movu m0, [r0 + r4]
- movu m1, [r0 + r4 + 16]
- movu m2, [r1 + r4]
- movu m3, [r1 + r4 + 16]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recon[] and recipred[]
- movu [r3 + r6], m0
- movu [r3 + r6 + 16], m1
-
- ; store recqt[]
- movu [r2 + r5], m0
- movu [r2 + r5 + 16], m1
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 2]
- lea r2, [r2 + r5 * 2]
- lea r3, [r3 + r6 * 2]
-
- dec t7b
- jnz .loop
- RET
-%else ;HIGH_BIT_DEPTH
-
-INIT_XMM sse4
-%if ARCH_X86_64 == 1
-cglobal calcRecons16, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons16, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
-
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r5d, r5d
-
- pxor m0, m0
- mov t7b, 16
-.loop:
- movu m2, [r0]
- pmovzxbw m1, m2
- punpckhbw m2, m0
- paddw m1, [r1]
- paddw m2, [r1 + 16]
- packuswb m1, m2
-
- ; store recon[] and recipred[]
- movu [r3], m1
-
- ; store recqt[]
- pmovzxbw m2, m1
- punpckhbw m1, m0
- movu [r2], m2
- movu [r2 + 16], m1
-
- add r2, r5
- add r3, r6
- add r0, r4
- lea r1, [r1 + r4 * 2]
-
- dec t7b
- jnz .loop
- RET
-%endif ;HIGH_BIT_DEPTH
-
-%if HIGH_BIT_DEPTH
-INIT_XMM sse2
-%if ARCH_X86_64 == 1
-cglobal calcRecons32, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons32, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
-
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r4d, r4d
- add r5d, r5d
- add r6d, r6d
-
- pxor m4, m4
- mova m5, [pw_pixel_max]
- mov t7b, 32/2
-.loop:
-
- movu m0, [r0]
- movu m1, [r0 + 16]
- movu m2, [r1]
- movu m3, [r1 + 16]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recipred[]
- movu [r3], m0
- movu [r3 + 16], m1
-
- ; store recqt[]
- movu [r2], m0
- movu [r2 + 16], m1
-
- movu m0, [r0 + 32]
- movu m1, [r0 + 48]
- movu m2, [r1 + 32]
- movu m3, [r1 + 48]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recon[] and recipred[]
- movu [r3 + 32], m0
- movu [r3 + 48], m1
-
- ; store recqt[]
- movu [r2 + 32], m0
- movu [r2 + 48], m1
- add r2, r5
-
- movu m0, [r0 + r4]
- movu m1, [r0 + r4 + 16]
- movu m2, [r1 + r4]
- movu m3, [r1 + r4 + 16]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recon[] and recipred[]
- movu [r3 + r6], m0
- movu [r3 + r6 + 16], m1
-
- ; store recqt[]
- movu [r2], m0
- movu [r2 + 16], m1
-
- movu m0, [r0 + r4 + 32]
- movu m1, [r0 + r4 + 48]
- movu m2, [r1 + r4 + 32]
- movu m3, [r1 + r4 + 48]
- paddw m0, m2
- paddw m1, m3
- CLIPW2 m0, m1, m4, m5
-
- ; store recon[] and recipred[]
- movu [r3 + r6 + 32], m0
- movu [r3 + r6 + 48], m1
- lea r3, [r3 + r6 * 2]
-
- ; store recqt[]
- movu [r2 + 32], m0
- movu [r2 + 48], m1
- add r2, r5
-
- lea r0, [r0 + r4 * 2]
- lea r1, [r1 + r4 * 2]
-
- dec t7b
- jnz .loop
- RET
-%else ;HIGH_BIT_DEPTH
-INIT_XMM sse4
-%if ARCH_X86_64 == 1
-cglobal calcRecons32, 5,8,4
- %define t7b r7b
-%else
-cglobal calcRecons32, 5,7,4,0-1
- %define t7b byte [rsp]
-%endif
-
- mov r4d, r4m
- mov r5d, r5m
- mov r6d, r6m
- add r5d, r5d
-
- pxor m0, m0
- mov t7b, 32
-.loop:
- movu m2, [r0]
- movu m4, [r0 + 16]
- pmovzxbw m1, m2
- punpckhbw m2, m0
- pmovzxbw m3, m4
- punpckhbw m4, m0
-
- paddw m1, [r1 + 0 * 16]
- paddw m2, [r1 + 1 * 16]
- packuswb m1, m2
-
- paddw m3, [r1 + 2 * 16]
- paddw m4, [r1 + 3 * 16]
- packuswb m3, m4
-
- ; store recon[] and recipred[]
- movu [r3], m1
- movu [r3 + 16], m3
-
- ; store recqt[]
- pmovzxbw m2, m1
- punpckhbw m1, m0
- movu [r2 + 0 * 16], m2
- movu [r2 + 1 * 16], m1
- pmovzxbw m4, m3
- punpckhbw m3, m0
- movu [r2 + 2 * 16], m4
- movu [r2 + 3 * 16], m3
-
- add r2, r5
- add r3, r6
- add r0, r4
- lea r1, [r1 + r4 * 2]
-
- dec t7b
- jnz .loop
- RET
-%endif ;HIGH_BIT_DEPTH
-
;-----------------------------------------------------------------------------
; void getResidual(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride)
More information about the x265-devel
mailing list