[vlc-commits] i420_rgb: remove redundant loads in RV15/16 MMX unpacking
Lyndon Brown
git at videolan.org
Wed Mar 6 17:13:33 CET 2019
vlc | branch: master | Lyndon Brown <jnqnfe at gmail.com> | Sun Jan 20 10:01:57 2019 +0000| [3251ab0755bcb0d3079589b153121c0007cc1486] | committer: Jean-Baptiste Kempf
i420_rgb: remove redundant loads in RV15/16 MMX unpacking
Signed-off-by: Jean-Baptiste Kempf <jb at videolan.org>
> http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=3251ab0755bcb0d3079589b153121c0007cc1486
---
modules/video_chroma/i420_rgb_mmx.h | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/modules/video_chroma/i420_rgb_mmx.h b/modules/video_chroma/i420_rgb_mmx.h
index 521a830ce9..4c2bb48ed5 100644
--- a/modules/video_chroma/i420_rgb_mmx.h
+++ b/modules/video_chroma/i420_rgb_mmx.h
@@ -215,16 +215,13 @@ punpcklbw %%mm4, %%mm2 # ________ ________ g7g6g5g4 g3______ \n\
punpcklbw %%mm1, %%mm0 # __r7r6r5 r4r3____ ______b7 b6b5b4b3 \n\
psllw $2,%%mm2 # ________ ____g7g6 g5g4g3__ ________ \n\
por %%mm2, %%mm0 # __r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 \n\
-movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movq %%mm0, (%3) # store pixel 0-3 \n\
\n\
# pack the 3 separate RGB bytes into 2 for pixels 4-7 \n\
punpckhbw %%mm4, %%mm7 # ________ ________ g7g6g5g4 g3______ \n\
punpckhbw %%mm1, %%mm5 # __r7r6r5 r4r3____ ______b7 b6b5b4b3 \n\
psllw $2,%%mm7 # ________ ____g7g6 g5g4g3__ ________ \n\
-movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
por %%mm7, %%mm5 # __r7r6r5 r4r3g7g6 g5g4g3b7 b6b5b4b3 \n\
-movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
movq %%mm5, 8(%3) # store pixel 4-7 \n\
"
@@ -250,16 +247,13 @@ punpcklbw %%mm4, %%mm2 # ________ ________ g7g6g5g4 g3g2____ \n\
punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
psllw $3,%%mm2 # ________ __g7g6g5 g4g3g2__ ________ \n\
por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
-movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movq %%mm0, (%3) # store pixel 0-3 \n\
\n\
# pack the 3 separate RGB bytes into 2 for pixels 4-7 \n\
punpckhbw %%mm4, %%mm7 # ________ ________ g7g6g5g4 g3g2____ \n\
punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3______ ______b7 b6b5b4b3 \n\
psllw $3,%%mm7 # ________ __g7g6g5 g4g3g2__ ________ \n\
-movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3 \n\
-movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
movq %%mm5, 8(%3) # store pixel 4-7 \n\
"
@@ -437,15 +431,12 @@ movq %%mm2, 24(%3) # Store ABGR7 ABGR6 \n\
mm0 = _mm_unpacklo_pi8(mm0, mm1); \
mm2 = _mm_slli_pi16(mm2, 2); \
mm0 = _mm_or_si64(mm0, mm2); \
- mm6 = (__m64)*(uint64_t *)(p_y + 8); \
*(uint64_t *)p_buffer = (uint64_t)mm0; \
\
mm7 = _mm_unpackhi_pi8(mm7, mm4); \
mm5 = _mm_unpackhi_pi8(mm5, mm1); \
mm7 = _mm_slli_pi16(mm7, 2); \
- mm0 = _mm_cvtsi32_si64((int)*(uint32_t *)(p_u + 4)); \
mm5 = _mm_or_si64(mm5, mm7); \
- mm1 = _mm_cvtsi32_si64((int)*(uint32_t *)(p_v + 4)); \
*(uint64_t *)(p_buffer + 4) = (uint64_t)mm5;
#define MMX_UNPACK_16 \
@@ -461,15 +452,12 @@ movq %%mm2, 24(%3) # Store ABGR7 ABGR6 \n\
mm0 = _mm_unpacklo_pi8(mm0, mm1); \
mm2 = _mm_slli_pi16(mm2, 3); \
mm0 = _mm_or_si64(mm0, mm2); \
- mm6 = (__m64)*(uint64_t *)(p_y + 8); \
*(uint64_t *)p_buffer = (uint64_t)mm0; \
\
mm7 = _mm_unpackhi_pi8(mm7, mm4); \
mm5 = _mm_unpackhi_pi8(mm5, mm1); \
mm7 = _mm_slli_pi16(mm7, 3); \
- mm0 = _mm_cvtsi32_si64((int)*(uint32_t *)(p_u + 4)); \
mm5 = _mm_or_si64(mm5, mm7); \
- mm1 = _mm_cvtsi32_si64((int)*(uint32_t *)(p_v + 4)); \
*(uint64_t *)(p_buffer + 4) = (uint64_t)mm5;
#define MMX_UNPACK_32_ARGB \
More information about the vlc-commits
mailing list