[x264-devel] commit: Fix weightp on ARM + PPC (David Conrad )

git version control git at videolan.org
Mon Nov 9 05:22:04 CET 2009


x264 | branch: master | David Conrad <lessen42 at gmail.com> | Sun Nov  8 20:12:54 2009 -0800| [2bedf8945f921774714dacf5f0668e01c1810b46] | committer: Jason Garrett-Glaser 

Fix weightp on ARM + PPC
No ARM or PPC assembly yet though.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2bedf8945f921774714dacf5f0668e01c1810b46
---

 Makefile          |    8 +++-----
 common/arm/mc-c.c |   20 ++++++++++++++------
 common/ppc/mc.c   |   16 +++++++++++++---
 configure         |    5 ++---
 4 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index 8ba8d81..2fe3361 100644
--- a/Makefile
+++ b/Makefile
@@ -64,11 +64,9 @@ endif
 
 # AltiVec optims
 ifeq ($(ARCH),PPC)
-ALTIVECSRC += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \
-              common/ppc/quant.c common/ppc/deblock.c \
-              common/ppc/predict.c
-SRCS += $(ALTIVECSRC)
-$(ALTIVECSRC:%.c=%.o): CFLAGS += $(ALTIVECFLAGS)
+SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \
+        common/ppc/quant.c common/ppc/deblock.c \
+        common/ppc/predict.c
 endif
 
 # NEON optims
diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
index c6aaeb0..167b11b 100644
--- a/common/arm/mc-c.c
+++ b/common/arm/mc-c.c
@@ -76,7 +76,7 @@ static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
 static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
                           uint8_t *src[4], int i_src_stride,
                           int mvx, int mvy,
-                          int i_width, int i_height )
+                          int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -90,18 +90,19 @@ static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
         x264_pixel_avg_wtab_neon[i_width>>2](
                 dst, i_dst_stride, src1, i_src_stride,
                 src2, i_height );
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
     }
+    else if( weight->weightfn )
+        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
-    {
-        x264_mc_copy_wtab_neon[i_width>>2](
-                dst, i_dst_stride, src1, i_src_stride, i_height );
-    }
+        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
 }
 
 static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
                               uint8_t *src[4], int i_src_stride,
                               int mvx, int mvy,
-                              int i_width, int i_height )
+                              int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -115,6 +116,13 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
         x264_pixel_avg_wtab_neon[i_width>>2](
                 dst, *i_dst_stride, src1, i_src_stride,
                 src2, i_height );
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
+        return dst;
+    }
+    else if( weight->weightfn )
+    {
+        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
         return dst;
     }
     else
diff --git a/common/ppc/mc.c b/common/ppc/mc.c
index c703d08..a588d8f 100644
--- a/common/ppc/mc.c
+++ b/common/ppc/mc.c
@@ -181,7 +181,7 @@ static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, int i_dst,
 static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
                              uint8_t *src[4], int i_src_stride,
                              int mvx, int mvy,
-                             int i_width, int i_height )
+                             int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -201,8 +201,11 @@ static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
         default:
             x264_pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
         }
-
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
     }
+    else if( weight->weightfn )
+        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
     {
         switch(i_width) {
@@ -224,7 +227,7 @@ static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
 static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
                                  uint8_t *src[4], int i_src_stride,
                                  int mvx, int mvy,
-                                 int i_width, int i_height )
+                                 int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -248,6 +251,13 @@ static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
             x264_pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
             break;
         }
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
+        return dst;
+    }
+    else if( weight->weightfn )
+    {
+        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
         return dst;
     }
     else
diff --git a/configure b/configure
index d26ace1..7cb32c0 100755
--- a/configure
+++ b/configure
@@ -267,9 +267,9 @@ case $host_cpu in
     ARCH="PPC"
     if [ $SYS = MACOSX ]
     then
-      ALTIVECFLAGS="$ALTIVECFLAGS -faltivec -fastf -mcpu=G4"
+      CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
     else
-      ALTIVECFLAGS="$ALTIVECFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H"
+      CFLAGS="$CFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H"
     fi
     ;;
   sparc)
@@ -467,7 +467,6 @@ ARCH=$ARCH
 SYS=$SYS
 CC=$CC
 CFLAGS=$CFLAGS
-ALTIVECFLAGS=$ALTIVECFLAGS
 LDFLAGS=$LDFLAGS
 AR=$AR
 RANLIB=$RANLIB



More information about the x264-devel mailing list