[x264-devel] commit: Fix some misattributions in profiling (Holger Lubitz )
git at videolan.org
git at videolan.org
Sun Mar 28 04:44:33 CEST 2010
x264 | branch: master | Holger Lubitz <holger at lubitz.org> | Sat Mar 20 20:41:21 2010 +0100| [ce6bd494529a393acfc9ab26e798f94040cce1e0] | committer: Jason Garrett-Glaser
Fix some misattributions in profiling
Cycles spent in load_hadamard and the avg2 w16 ssse3 cacheline split code were misattributed.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=ce6bd494529a393acfc9ab26e798f94040cce1e0
---
common/x86/mc-a.asm | 17 ++++++++---------
common/x86/pixel-a.asm | 12 ++++++------
2 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm
index 9783066..6dbb5fc 100644
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -754,15 +754,6 @@ avg_w16_align%1_%2_ssse3:
rep ret
%endmacro
-%assign j 1
-%assign k 2
-%rep 15
-AVG16_CACHELINE_LOOP_SSSE3 j, j
-AVG16_CACHELINE_LOOP_SSSE3 j, k
-%assign j j+1
-%assign k k+1
-%endrep
-
cglobal x264_pixel_avg2_w16_cache64_ssse3
mov eax, r2m
and eax, 0x3f
@@ -790,6 +781,14 @@ cglobal x264_pixel_avg2_w16_cache64_ssse3
RET
%endif
+%assign j 1
+%assign k 2
+%rep 15
+AVG16_CACHELINE_LOOP_SSSE3 j, j
+AVG16_CACHELINE_LOOP_SSSE3 j, k
+%assign j j+1
+%assign k k+1
+%endrep
;=============================================================================
; pixel copy
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
index 46b4557..fa9d37a 100644
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1247,8 +1247,8 @@ cglobal x264_intra_sa8d_x3_8x8_core_%1, 3,3,16
; in: r0 = fenc
; out: m0..m3 = hadamard coefs
INIT_MMX
-ALIGN 16
-load_hadamard:
+cglobal x264_hadamard_load
+; not really a global, but otherwise cycles get attributed to the wrong function in profiling
pxor m7, m7
movd m0, [r0+0*FENC_STRIDE]
movd m1, [r0+1*FENC_STRIDE]
@@ -1259,7 +1259,7 @@ load_hadamard:
punpcklbw m2, m7
punpcklbw m3, m7
HADAMARD4_2D 0, 1, 2, 3, 4
- SAVE_MM_PERMUTATION load_hadamard
+ SAVE_MM_PERMUTATION x264_hadamard_load
ret
%macro SCALAR_SUMSUB 4
@@ -1393,7 +1393,7 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
%define t0 r2
%endif
- call load_hadamard
+ call x264_hadamard_load
SCALAR_HADAMARD_LEFT 0, r0, r3, r4, r5
mov t0d, r0d
SCALAR_HADAMARD_TOP 0, r0, r3, r4, r5
@@ -1466,7 +1466,7 @@ cglobal x264_intra_satd_x3_16x16_%1, 0,7
.loop_y:
xor r4d, r4d
.loop_x:
- call load_hadamard
+ call x264_hadamard_load
SUM3x4 %1
SUM4x3 t2d, [left_1d+8*r3], [top_1d+8*r4]
@@ -1555,7 +1555,7 @@ cglobal x264_intra_satd_x3_8x8c_%1, 0,6
.loop_y:
xor r4d, r4d
.loop_x:
- call load_hadamard
+ call x264_hadamard_load
SUM3x4 %1
SUM4x3 [r5+4*r4], [left_1d+8*r3], [top_1d+8*r4]
More information about the x264-devel
mailing list