[x265] [PATCH] few simple alignments in filter asm code
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Oct 18 14:33:45 CEST 2013
# HG changeset patch
# User Praveen Tiwari
# Date 1382099614 -19800
# Node ID 4f2f4c06be4ab96ef1b8c47b838bb340f28843b8
# Parent 5a6e4b47758b6cacc742e469f3e690190d618304
few simple alignments in filter asm code
diff -r 5a6e4b47758b -r 4f2f4c06be4a source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Fri Oct 18 17:35:01 2013 +0530
+++ b/source/common/x86/ipfilter8.asm Fri Oct 18 18:03:34 2013 +0530
@@ -27,8 +27,8 @@
%include "x86util.asm"
SECTION_RODATA 32
-tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
- db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
+ db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
tab_Lm: db 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8
db 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 10
@@ -282,7 +282,6 @@
RET
-
%macro FILTER_H4_w6 3
movu %1, [srcq - 1]
pshufb %2, %1, Tm0
@@ -442,20 +441,20 @@
add srcq, srcstrideq
add dstq, dststrideq
-dec r5d
-jnz .loop
+dec r5d
+jnz .loop
RET
%endmacro
-IPFILTER_CHROMA 6, 8
-IPFILTER_CHROMA 8, 2
-IPFILTER_CHROMA 8, 4
-IPFILTER_CHROMA 8, 6
-IPFILTER_CHROMA 8, 8
-IPFILTER_CHROMA 8, 16
-IPFILTER_CHROMA 8, 32
+IPFILTER_CHROMA 6, 8
+IPFILTER_CHROMA 8, 2
+IPFILTER_CHROMA 8, 4
+IPFILTER_CHROMA 8, 6
+IPFILTER_CHROMA 8, 8
+IPFILTER_CHROMA 8, 16
+IPFILTER_CHROMA 8, 32
IPFILTER_CHROMA 12, 16
;-----------------------------------------------------------------------------
@@ -472,7 +471,7 @@
%define t1 m1
%define t0 m0
-mov r4d, r4m
+mov r4d, r4m
%ifdef PIC
lea r5, [tab_ChromaCoeff]
@@ -481,7 +480,7 @@
movd coef2, [tab_ChromaCoeff + r4 * 4]
%endif
-mov r5d, %2
+mov r5d, %2
pshufd coef2, coef2, 0
mova t2, [tab_c_512]
@@ -493,18 +492,18 @@
add srcq, srcstrideq
add dstq, dststrideq
-dec r5d
-jnz .loop
+dec r5d
+jnz .loop
RET
%endmacro
-IPFILTER_CHROMA_W 16, 4
-IPFILTER_CHROMA_W 16, 8
+IPFILTER_CHROMA_W 16, 4
+IPFILTER_CHROMA_W 16, 8
IPFILTER_CHROMA_W 16, 12
IPFILTER_CHROMA_W 16, 16
IPFILTER_CHROMA_W 16, 32
-IPFILTER_CHROMA_W 32, 8
+IPFILTER_CHROMA_W 32, 8
IPFILTER_CHROMA_W 32, 16
IPFILTER_CHROMA_W 32, 24
IPFILTER_CHROMA_W 24, 32
@@ -573,27 +572,27 @@
mov r4, %2
.loop
- xor r5, r5
+ xor r5, r5
%rep %1 / 8
FILTER_H8_W8 m0, m1, m2
- add r5, 8
+ add r5, 8
%endrep
%rep (%1 % 8) / 4
FILTER_H8_W4 m0, m1, m2
- add r5, 4
+ add r5, 4
%endrep
%rep(%1 % 4)
FILTER_H8_W1 m0, m1, m2
- add r5, 1
+ add r5, 1
%endrep
- add r0, r1
- add r2, r3
+ add r0, r1
+ add r2, r3
- dec r4d
- jnz .loop
+ dec r4d
+ jnz .loop
RET
%endmacro
More information about the x265-devel
mailing list