[x265] [PATCH 09 of 29] intra_pred_dc: cleanup, remove unused asm code
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:17 CET 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1418728444 -19800
# Tue Dec 16 16:44:04 2014 +0530
# Node ID 723d7d1622b8de39d5221f0b8ff82a2a922e7d8b
# Parent c238ea33fa51521639404aeb65864a7e4492dcef
intra_pred_dc: cleanup, remove unused asm code
diff -r c238ea33fa51 -r 723d7d1622b8 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Jan 12 12:48:45 2015 +0530
+++ b/source/common/x86/intrapred16.asm Tue Dec 16 16:44:04 2014 +0530
@@ -71,73 +71,6 @@
cextern pb_unpackwq1
cextern pb_unpackwq2
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc4, 4,6,2
- mov r4d, r5m
- add r2, 2
- add r3, 2
-
- movh m0, [r3] ; sumAbove
- movh m1, [r2] ; sumLeft
-
- paddw m0, m1
- pshufd m1, m0, 1
- paddw m0, m1
- phaddw m0, m0 ; m0 = sum
-
- test r4d, r4d
-
- pmulhrsw m0, [pw_4096] ; m0 = (sum + 4) / 8
- movd r4d, m0 ; r4d = dc_val
- movzx r4d, r4w
- pshuflw m0, m0, 0 ; m0 = word [dc_val ...]
-
- ; store DC 4x4
- movh [r0], m0
- movh [r0 + r1 * 2], m0
- movh [r0 + r1 * 4], m0
- lea r5, [r0 + r1 * 4]
- movh [r5 + r1 * 2], m0
-
- ; do DC filter
- jz .end
- lea r5d, [r4d * 2 + 2] ; r5d = DC * 2 + 2
- add r4d, r5d ; r4d = DC * 3 + 2
- movd m0, r4d
- pshuflw m0, m0, 0 ; m0 = pixDCx3
-
- ; filter top
- movu m1, [r3]
- paddw m1, m0
- psraw m1, 2
- movh [r0], m1 ; overwrite top-left pixel, we will update it later
-
- ; filter top-left
- movzx r3d, word [r3]
- add r5d, r3d
- movzx r3d, word [r2]
- add r3d, r5d
- shr r3d, 2
- mov [r0], r3w
-
- ; filter left
- lea r0, [r0 + r1 * 2]
- movu m1, [r2 + 2]
- paddw m1, m0
- psraw m1, 2
- movd r3d, m1
- mov [r0], r3w
- shr r3d, 16
- mov [r0 + r1 * 2], r3w
- pextrw [r0 + r1 * 4], m1, 2
-
-.end:
-
- RET
-
;-----------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
;-----------------------------------------------------------------------------------
@@ -202,87 +135,6 @@
.end:
RET
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc8, 4, 7, 2
- mov r4d, r5m
- add r2, 2
- add r3, 2
- add r1, r1
- movu m0, [r3]
- movu m1, [r2]
-
- paddw m0, m1
- movhlps m1, m0
- paddw m0, m1
- phaddw m0, m0
- pmaddwd m0, [pw_1]
-
- movd r5d, m0
- add r5d, 8
- shr r5d, 4 ; sum = sum / 16
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = word [dc_val ...]
- pshufd m1, m1, 0
-
- test r4d, r4d
-
- ; store DC 8x8
- mov r6, r0
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + r1 * 2], m1
- lea r0, [r0 + r1 * 2]
- movu [r0 + r1], m1
- movu [r0 + r1 * 2], m1
- lea r0, [r0 + r1 * 2]
- movu [r0 + r1], m1
- movu [r0 + r1 * 2], m1
- lea r0, [r0 + r1 * 2]
- movu [r0 + r1], m1
-
- ; Do DC Filter
- jz .end
- lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
- add r5d, r4d ; r5d = DC * 3 + 2
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = pixDCx3
- pshufd m1, m1, 0
-
- ; filter top
- movu m0, [r3]
- paddw m0, m1
- psraw m0, 2
- movu [r6], m0
-
- ; filter top-left
- movzx r3d, word [r3]
- add r4d, r3d
- movzx r3d, word [r2]
- add r3d, r4d
- shr r3d, 2
- mov [r6], r3w
-
- ; filter left
- add r6, r1
- movu m0, [r2 + 2]
- paddw m0, m1
- psraw m0, 2
- pextrw [r6], m0, 0
- pextrw [r6 + r1], m0, 1
- pextrw [r6 + r1 * 2], m0, 2
- lea r6, [r6 + r1 * 2]
- pextrw [r6 + r1], m0, 3
- pextrw [r6 + r1 * 2], m0, 4
- lea r6, [r6 + r1 * 2]
- pextrw [r6 + r1], m0, 5
- pextrw [r6 + r1 * 2], m0, 6
-
-.end:
- RET
-
;-----------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
;-----------------------------------------------------------------------------------
@@ -366,10 +218,9 @@
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
;-------------------------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal intra_pred_dc16, 4, 7, 4
- mov r4d, r5m
+cglobal intra_pred_dc16_new, 5, 7, 4
+ lea r3, [r2 + 66]
add r2, 2
- add r3, 2
add r1, r1
movu m0, [r3]
movu m1, [r3 + 16]
@@ -444,26 +295,26 @@
pshufd m1, m1, 0
; filter top
- movu m2, [r3]
+ movu m2, [r2]
paddw m2, m1
psraw m2, 2
movu [r6], m2
- movu m3, [r3 + 16]
+ movu m3, [r2 + 16]
paddw m3, m1
psraw m3, 2
movu [r6 + 16], m3
; filter top-left
- movzx r3d, word [r3]
- add r4d, r3d
- movzx r3d, word [r2]
- add r3d, r4d
- shr r3d, 2
- mov [r6], r3w
+ movzx r5d, word [r3]
+ add r4d, r5d
+ movzx r5d, word [r2]
+ add r5d, r4d
+ shr r5d, 2
+ mov [r6], r5w
; filter left
add r6, r1
- movu m2, [r2 + 2]
+ movu m2, [r3 + 2]
paddw m2, m1
psraw m2, 2
@@ -480,141 +331,6 @@
pextrw [r6 + r1], m2, 7
lea r6, [r6 + r1 * 2]
- movu m3, [r2 + 18]
- paddw m3, m1
- psraw m3, 2
-
- pextrw [r6], m3, 0
- pextrw [r6 + r1], m3, 1
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m3, 2
- pextrw [r6 + r1], m3, 3
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m3, 4
- pextrw [r6 + r1], m3, 5
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m3, 6
-
-.end:
- RET
-
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16_new, 5, 7, 4
- lea r3, [r2 + 66]
- add r2, 2
- add r1, r1
- movu m0, [r3]
- movu m1, [r3 + 16]
- movu m2, [r2]
- movu m3, [r2 + 16]
-
- paddw m0, m1
- paddw m2, m3
- paddw m0, m2
- movhlps m1, m0
- paddw m0, m1
- phaddw m0, m0
- pmaddwd m0, [pw_1]
-
- movd r5d, m0
- add r5d, 16
- shr r5d, 5 ; sum = sum / 16
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = word [dc_val ...]
- pshufd m1, m1, 0
-
- test r4d, r4d
-
- ; store DC 16x16
- mov r6, r0
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + 16], m1
- movu [r0 + r1], m1
- movu [r0 + 16 + r1], m1
-
- ; Do DC Filter
- jz .end
- lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
- add r5d, r4d ; r5d = DC * 3 + 2
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = pixDCx3
- pshufd m1, m1, 0
-
- ; filter top
- movu m2, [r2]
- paddw m2, m1
- psraw m2, 2
- movu [r6], m2
- movu m3, [r2 + 16]
- paddw m3, m1
- psraw m3, 2
- movu [r6 + 16], m3
-
- ; filter top-left
- movzx r5d, word [r3]
- add r4d, r5d
- movzx r5d, word [r2]
- add r5d, r4d
- shr r5d, 2
- mov [r6], r5w
-
- ; filter left
- add r6, r1
- movu m2, [r3 + 2]
- paddw m2, m1
- psraw m2, 2
-
- pextrw [r6], m2, 0
- pextrw [r6 + r1], m2, 1
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m2, 2
- pextrw [r6 + r1], m2, 3
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m2, 4
- pextrw [r6 + r1], m2, 5
- lea r6, [r6 + r1 * 2]
- pextrw [r6], m2, 6
- pextrw [r6 + r1], m2, 7
-
- lea r6, [r6 + r1 * 2]
movu m3, [r3 + 18]
paddw m3, m1
psraw m3, 2
@@ -707,82 +423,6 @@
jnz .loop
RET
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc32, 4, 5, 6
- mov r4d, r5m
- add r2, 2
- add r3, 2
- add r1, r1
- movu m0, [r3]
- movu m1, [r3 + 16]
- movu m2, [r3 + 32]
- movu m3, [r3 + 48]
- paddw m0, m1
- paddw m2, m3
- paddw m0, m2
- movu m1, [r2]
- movu m3, [r2 + 16]
- movu m4, [r2 + 32]
- movu m5, [r2 + 48]
- paddw m1, m3
- paddw m4, m5
- paddw m1, m4
- paddw m0, m1
- movhlps m1, m0
- paddw m0, m1
- phaddw m0, m0
- pmaddwd m0, [pw_1]
-
- paddd m0, [pd_32] ; sum = sum + 32
- psrld m0, 6 ; sum = sum / 64
- pshuflw m0, m0, 0
- pshufd m0, m0, 0
-
- lea r2, [r1 * 3]
- mov r3d, 4
-.loop:
- ; store DC 32x32
- movu [r0 + 0], m0
- movu [r0 + 16], m0
- movu [r0 + 32], m0
- movu [r0 + 48], m0
- movu [r0 + r1 + 0], m0
- movu [r0 + r1 + 16], m0
- movu [r0 + r1 + 32], m0
- movu [r0 + r1 + 48], m0
- movu [r0 + r1 * 2 + 0], m0
- movu [r0 + r1 * 2 + 16], m0
- movu [r0 + r1 * 2 + 32], m0
- movu [r0 + r1 * 2 + 48], m0
- movu [r0 + r2 + 0], m0
- movu [r0 + r2 + 16], m0
- movu [r0 + r2 + 32], m0
- movu [r0 + r2 + 48], m0
- lea r0, [r0 + r1 * 4]
- movu [r0 + 0], m0
- movu [r0 + 16], m0
- movu [r0 + 32], m0
- movu [r0 + 48], m0
- movu [r0 + r1 + 0], m0
- movu [r0 + r1 + 16], m0
- movu [r0 + r1 + 32], m0
- movu [r0 + r1 + 48], m0
- movu [r0 + r1 * 2 + 0], m0
- movu [r0 + r1 * 2 + 16], m0
- movu [r0 + r1 * 2 + 32], m0
- movu [r0 + r1 * 2 + 48], m0
- movu [r0 + r2 + 0], m0
- movu [r0 + r2 + 16], m0
- movu [r0 + r2 + 32], m0
- movu [r0 + r2 + 48], m0
- lea r0, [r0 + r1 * 4]
- dec r3d
- jnz .loop
- RET
-
;-----------------------------------------------------------------------------------------------------------
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
;-----------------------------------------------------------------------------------------------------------
diff -r c238ea33fa51 -r 723d7d1622b8 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Mon Jan 12 12:48:45 2015 +0530
+++ b/source/common/x86/intrapred8.asm Tue Dec 16 16:44:04 2014 +0530
@@ -72,70 +72,6 @@
cextern multiH3
cextern multi_2Row
-;-----------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc4, 4,6,3
- mov r4d, r5m
- inc r2
- inc r3
- pxor m0, m0
- movd m1, [r2]
- movd m2, [r3]
- punpckldq m1, m2
- psadbw m1, m0 ; m1 = sum
-
- test r4d, r4d
-
- mov r4d, 4096
- movd m2, r4d
- pmulhrsw m1, m2 ; m1 = (sum + 4) / 8
- movd r4d, m1 ; r4d = dc_val
- pshufb m1, m0 ; m1 = byte [dc_val ...]
-
- ; store DC 4x4
- lea r5, [r1 * 3]
- movd [r0], m1
- movd [r0 + r1], m1
- movd [r0 + r1 * 2], m1
- movd [r0 + r5], m1
-
- ; do DC filter
- jz .end
- lea r5d, [r4d * 2 + 2] ; r5d = DC * 2 + 2
- add r4d, r5d ; r4d = DC * 3 + 2
- movd m1, r4d
- pshuflw m1, m1, 0 ; m1 = pixDCx3
-
- ; filter top
- pmovzxbw m2, [r3]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- movd [r0], m2 ; overwrite top-left pixel, we will update it later
-
- ; filter top-left
- movzx r3d, byte [r3]
- add r5d, r3d
- movzx r3d, byte [r2]
- add r3d, r5d
- shr r3d, 2
- mov [r0], r3b
-
- ; filter left
- add r0, r1
- pmovzxbw m2, [r2 + 1]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- pextrb [r0], m2, 0
- pextrb [r0 + r1], m2, 1
- pextrb [r0 + r1 * 2], m2, 2
-
-.end:
- RET
-
;---------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
;---------------------------------------------------------------------------------------------
@@ -198,86 +134,6 @@
.end:
RET
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc8, 4, 7, 3
- mov r4d, r5m
- inc r2
- inc r3
- pxor m0, m0
- movh m1, [r2]
- movh m2, [r3]
- punpcklqdq m1, m2
- psadbw m1, m0
- pshufd m2, m1, 2
- paddw m1, m2
-
- movd r5d, m1
- add r5d, 8
- shr r5d, 4 ; sum = sum / 16
- movd m1, r5d
- pshufb m1, m0 ; m1 = byte [dc_val ...]
-
- test r4d, r4d
-
- ; store DC 8x8
- mov r6, r0
- movh [r0], m1
- movh [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movh [r0], m1
- movh [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movh [r0], m1
- movh [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movh [r0], m1
- movh [r0 + r1], m1
-
- ; Do DC Filter
- jz .end
- lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
- add r5d, r4d ; r5d = DC * 3 + 2
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = pixDCx3
- pshufd m1, m1, 0
-
- ; filter top
- pmovzxbw m2, [r3]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- movh [r6], m2
-
- ; filter top-left
- movzx r3d, byte [r3]
- add r4d, r3d
- movzx r3d, byte [r2]
- add r3d, r4d
- shr r3d, 2
- mov [r6], r3b
-
- ; filter left
- add r6, r1
- pmovzxbw m2, [r2 + 1]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- pextrb [r6], m2, 0
- pextrb [r6 + r1], m2, 1
- pextrb [r6 + 2 * r1], m2, 2
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m2, 3
- pextrb [r6 + r1 * 2], m2, 4
- pextrb [r6 + r1 * 4], m2, 6
- lea r1, [r1 * 3]
- pextrb [r6 + r1], m2, 5
-
-.end:
- RET
-
;---------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
;---------------------------------------------------------------------------------------------
@@ -357,14 +213,13 @@
.end:
RET
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16, 5, 7, 4
- mov r4d, r5m
+;--------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
+;--------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc16_new, 5, 7, 4
+ lea r3, [r2 + 33]
inc r2
- inc r3
pxor m0, m0
movu m1, [r2]
movu m2, [r3]
@@ -417,120 +272,6 @@
pshufd m1, m1, 0
; filter top
- pmovzxbw m2, [r3]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- movh [r6], m2
- pmovzxbw m3, [r3 + 8]
- paddw m3, m1
- psraw m3, 2
- packuswb m3, m3
- movh [r6 + 8], m3
-
- ; filter top-left
- movzx r3d, byte [r3]
- add r4d, r3d
- movzx r3d, byte [r2]
- add r3d, r4d
- shr r3d, 2
- mov [r6], r3b
-
- ; filter left
- add r6, r1
- pmovzxbw m2, [r2 + 1]
- paddw m2, m1
- psraw m2, 2
- packuswb m2, m2
- pextrb [r6], m2, 0
- pextrb [r6 + r1], m2, 1
- pextrb [r6 + r1 * 2], m2, 2
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m2, 3
- pextrb [r6 + r1 * 2], m2, 4
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m2, 5
- pextrb [r6 + r1 * 2], m2, 6
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m2, 7
-
- pmovzxbw m3, [r2 + 9]
- paddw m3, m1
- psraw m3, 2
- packuswb m3, m3
- pextrb [r6 + r1 * 2], m3, 0
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m3, 1
- pextrb [r6 + r1 * 2], m3, 2
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m3, 3
- pextrb [r6 + r1 * 2], m3, 4
- lea r6, [r6 + r1 * 2]
- pextrb [r6 + r1], m3, 5
- pextrb [r6 + r1 * 2], m3, 6
-
-.end:
- RET
-
-;--------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
-;--------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16_new, 5, 7, 4
- lea r3, [r2 + 33]
- inc r2
- pxor m0, m0
- movu m1, [r2]
- movu m2, [r3]
- psadbw m1, m0
- psadbw m2, m0
- paddw m1, m2
- pshufd m2, m1, 2
- paddw m1, m2
-
- movd r5d, m1
- add r5d, 16
- shr r5d, 5 ; sum = sum / 32
- movd m1, r5d
- pshufb m1, m0 ; m1 = byte [dc_val ...]
-
- test r4d, r4d
-
- ; store DC 16x16
- mov r6, r0
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
- lea r0, [r0 + r1 * 2]
- movu [r0], m1
- movu [r0 + r1], m1
-
- ; Do DC Filter
- jz .end
- lea r4d, [r5d * 2 + 2] ; r4d = DC * 2 + 2
- add r5d, r4d ; r5d = DC * 3 + 2
- movd m1, r5d
- pshuflw m1, m1, 0 ; m1 = pixDCx3
- pshufd m1, m1, 0
-
- ; filter top
pmovzxbw m2, [r2]
paddw m2, m1
psraw m2, 2
@@ -586,80 +327,6 @@
.end:
RET
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc32, 4, 5, 5
- inc r2
- inc r3
- pxor m0, m0
- movu m1, [r2]
- movu m2, [r2 + 16]
- movu m3, [r3]
- movu m4, [r3 + 16]
- psadbw m1, m0
- psadbw m2, m0
- psadbw m3, m0
- psadbw m4, m0
- paddw m1, m2
- paddw m3, m4
- paddw m1, m3
- pshufd m2, m1, 2
- paddw m1, m2
-
- movd r4d, m1
- add r4d, 32
- shr r4d, 6 ; sum = sum / 64
- movd m1, r4d
- pshufb m1, m0 ; m1 = byte [dc_val ...]
-
-%rep 2
- ; store DC 16x16
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
- movu [r0], m1
- movu [r0 + r1], m1
- movu [r0 + 16], m1
- movu [r0 + r1 + 16],m1
- lea r0, [r0 + 2 * r1]
-%endrep
-
- RET
-
;---------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
;---------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list