VLC and Altivec

Michel Lanners mlan at cpu.lu
Tue Sep 11 08:05:53 CEST 2001


[CC: to vlc-devel in case anybody wants to help test...]

Hi all,

On   9 Sep, this message from Michel Lanners echoed through cyberspace:
> Salut Christophe,
> 
> On   5 Sep, this message from Christophe Massiot echoed through cyberspace:
>> I need a happy volunteer to test (and probably debug :-) the Altivec code
>> I have just committed into the CVS. Could you try and tell me how it goes ?
> 
> Well, it goes as expected, to some extend ;-) :
> 
> - I need to change some things in the Makefiles and in configure, to
>   make the Altivec code compile on Linux. Also, Altivec in C is
>   unuseable (no serious Altivec-enabled gcc available), so the
>   MotionCompensation is no option. Only ASM can be used.
> 
> - Alignment issues. The idct_inner_s needs alignment for the pi_block,
>   and the macroblock struct also for the idct[]'s. In addition, the
>   malloc() blocks used for these structs need to be 16-byte aligned. I
>   reused my #define malloc trick to replace all malloc calls by aligned
>   malloc's.
> 
> - There is a slight bug in idctaltivec.c that makes the compiler choke.
>   No biggy.
> 
> - The detection of Altivec availability at runtime doesn't work.
>   Overidden for now; needs to be solved later.
> 
> - At this point, vlc runs. However the image is distorted; edges have
>   colour borders, and the image is not clear. No idea what causes this;
>   maybe its a problem caused by the useage we make of Paulus' code?

Attached is a patch that brings current CVS (well, 20010909 anyway) to
the state discribed above; i.e. compilable and runnable on Linux without
Altivec-enabled gcc. No MotionCompensation yet, though...

There are one or two things that should go into CVS right away; the rest
is WIP:

diff -uNr src/vlc-cvs/vlc/plugins/idct/idctaltivec.c vlc-cvs-20010909/plugins/idct/idctaltivec.c
--- src/vlc-cvs/vlc/plugins/idct/idctaltivec.c  Thu Sep  6 12:19:18 2001
+++ vlc-cvs-20010909/plugins/idct/idctaltivec.c Tue Sep 11 00:33:34 2001
@@ -99,7 +99,7 @@
 {
 }

-static void InitIDCT( void * p_idct_data )
+static void InitIDCT( void ** pp_idct_data )
 {
 }

diff -uNr src/vlc-cvs/vlc/plugins/motion/motionaltivec.c vlc-cvs-20010909/plugins/motion/motionaltivec.c
--- src/vlc-cvs/vlc/plugins/motion/motionaltivec.c      Thu Sep  6 16:02:56 2001
+++ vlc-cvs-20010909/plugins/motion/motionaltivec.c     Sun Sep  9 16:44:25 2001
@@ -58,7 +58,7 @@
 MODULE_INIT_START
     p_module->i_capabilities = MODULE_CAPABILITY_NULL
                                 | MODULE_CAPABILITY_MOTION;
-    p_module->psz_longname = "MMX motion compensation module";
+    p_module->psz_longname = "Altivec motion compensation module";
 MODULE_INIT_STOP

 MODULE_ACTIVATE_START


> I have to investigate what causes the errors; maybe some variable not
> getting initialized correctly? I suppose Paulus' code has no bugs as
> obvious as visual artefacts....

I haven't found out what is wrong here. It must be in the IDCT proper,
since replacing the block copy by the standard C functions doesn't
improve the situation...

Cheers

Michel

-------------------------------------------------------------------------
Michel Lanners                 |  " Read Philosophy.  Study Art.
23, Rue Paul Henkes            |    Ask Questions.  Make Mistakes.
L-1710 Luxembourg              |
email   mlan at cpu.lu            |
http://www.cpu.lu/~mlan        |                     Learn Always. "


-- Attached file included as plaintext by Listar --
-- File: vlc-cvs-altivec.diff

diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/Makefile vlc-cvs-20010909/Makefile
--- src/vlc-cvs/vlc/Makefile	Thu Sep  6 12:39:10 2001
+++ vlc-cvs-20010909/Makefile	Sun Sep  9 17:35:57 2001
@@ -33,7 +33,7 @@
 AUDIO_DECODER := audio_decoder adec_generic adec_layer1 adec_layer2 adec_math
 SPU_DECODER := spu_decoder
 VIDEO_DECODER := video_parser vpar_headers vpar_blocks vpar_synchro vpar_pool video_decoder
-MISC := mtime tests modules netutils
+MISC := mtime tests modules netutils malloc
 
 C_OBJ :=	$(INTERFACE:%=src/interface/%.o) \
 		$(INPUT:%=src/input/%.o) \
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/Makefile.opts.in vlc-cvs-20010909/Makefile.opts.in
--- src/vlc-cvs/vlc/Makefile.opts.in	Wed Sep  5 18:07:49 2001
+++ vlc-cvs-20010909/Makefile.opts.in	Sun Sep  9 11:00:22 2001
@@ -45,7 +45,7 @@
 # Build environment
 # 
 CC = @CC@
-CFLAGS = @CFLAGS@
+CFLAGS = -Wa,-m7400 @CFLAGS@
 SHELL = @SHELL@
 RANLIB = @RANLIB@
 WINDRES = @WINDRES@
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/configure vlc-cvs-20010909/configure
--- src/vlc-cvs/vlc/configure	Thu Sep  6 15:24:21 2001
+++ vlc-cvs-20010909/configure	Sun Sep  9 16:45:48 2001
@@ -3446,7 +3446,8 @@
 MMXEXT_MODULES="idctmmxext motionmmxext"
 THREEDNOW_MODULES="imdct3dn downmix3dn"
 SSE_MODULES="imdctsse downmixsse"
-ALTIVEC_MODULES="idctaltivec motionaltivec"
+ALTIVEC_MODULES_ASM="idctaltivec"
+ALTIVEC_MODULES_C="motionaltivec"
 
 echo $ac_n "checking if \$CC groks MMX inline assembly""... $ac_c" 1>&6
 echo "configure:3453: checking if \$CC groks MMX inline assembly" >&5
@@ -3548,7 +3549,7 @@
 EOF
 if { (eval echo configure:3550: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
-  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES}"
+  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES_C}"
   echo "$ac_t""yes" 1>&6
 else
   echo "configure: failed program was:" >&5
@@ -3557,6 +3558,7 @@
   echo "$ac_t""no" 1>&6
 fi
 rm -f conftest*
+  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES_ASM}"
 
 
 CAN_BUILD_LIBDVDCSS=0
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/configure.in vlc-cvs-20010909/configure.in
--- src/vlc-cvs/vlc/configure.in	Thu Sep  6 15:24:21 2001
+++ vlc-cvs-20010909/configure.in	Sun Sep  9 16:55:02 2001
@@ -218,7 +218,8 @@
 MMXEXT_MODULES="idctmmxext motionmmxext"
 THREEDNOW_MODULES="imdct3dn downmix3dn"
 SSE_MODULES="imdctsse downmixsse"
-ALTIVEC_MODULES="idctaltivec motionaltivec"
+ALTIVEC_MODULES_C="motionaltivec"
+ALTIVEC_MODULES_ASM="idctaltivec"
 
 AC_MSG_CHECKING([if \$CC groks MMX inline assembly])
 AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],,
@@ -240,9 +241,14 @@
   ACCEL_MODULES="${ACCEL_MODULES} ${SSE_MODULES}"
   AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
 
-AC_MSG_CHECKING([if \$CC groks Altivec code])
+AC_MSG_CHECKING([if \$CC groks Altivec C code])
 AC_TRY_COMPILE([void quux(){vec_mtvscr( (vector unsigned int)(0) );}],,
-  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES}"
+  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES_C}"
+  AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
+
+AC_MSG_CHECKING([if \$CC groks Altivec inline ASM])
+AC_TRY_COMPILE([void quux(){asm ("vmrghh 1,2,3");}],,
+  ACCEL_MODULES="${ACCEL_MODULES} ${ALTIVEC_MODULES_ASM}"
   AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
 
 dnl
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/include/malloc-aligned.h vlc-cvs-20010909/include/malloc-aligned.h
--- src/vlc-cvs/vlc/include/malloc-aligned.h	Thu Jan  1 01:00:00 1970
+++ vlc-cvs-20010909/include/malloc-aligned.h	Sun Sep  9 17:37:26 2001
@@ -0,0 +1,22 @@
+#define ALIGN 16
+
+#ifndef __USE_XOPEN2K
+#define __USE_XOPEN2K
+#include <stdlib.h>
+#undef __USE_XOPEN2K
+#else
+#include <stdlib.h>
+#endif
+
+#ifndef __USE_ISOC99
+#define __USE_ISOC99
+#include <math.h>
+#undef __USE_ISOC99
+#else
+#include <math.h>
+#endif
+
+#define malloc malign_16
+
+void * malign_16 ( size_t );
+
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/include/vdec_ext-plugins.h vlc-cvs-20010909/include/vdec_ext-plugins.h
--- src/vlc-cvs/vlc/include/vdec_ext-plugins.h	Wed Sep  5 18:07:49 2001
+++ vlc-cvs-20010909/include/vdec_ext-plugins.h	Sun Sep  9 17:39:20 2001
@@ -27,7 +27,7 @@
  *****************************************************************************/
 typedef struct idct_inner_s
 {
-    dctelem_t               pi_block[64];                           /* block */
+    dctelem_t               pi_block[64] ATTR_ALIGN(16);   /* block */
     void                ( * pf_idct )   ( dctelem_t *, yuv_data_t *, int,
                                           void *, int );
                                         /* sparse IDCT or not, add or copy ? */
@@ -49,14 +49,14 @@
 
 typedef struct macroblock_s
 {
-    int                     i_mb_modes;
-
-    /* IDCT information */
-    idct_inner_t            p_idcts[6];
+    /* IDCT information, first to ease alignment */
+    idct_inner_t            p_idcts[6] ATTR_ALIGN(16);
     int                     i_coded_block_pattern;
                                                  /* which blocks are coded ? */
     int                     i_lum_dct_stride, i_chrom_dct_stride;
                                  /* nb of coeffs to jump when changing lines */
+
+    int                     i_mb_modes;
 
     /* Motion compensation information */
     motion_inner_t          p_motions[8];
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/plugins/idct/Makefile vlc-cvs-20010909/plugins/idct/Makefile
--- src/vlc-cvs/vlc/plugins/idct/Makefile	Thu Sep  6 12:39:10 2001
+++ vlc-cvs-20010909/plugins/idct/Makefile	Sun Sep  9 16:42:29 2001
@@ -32,10 +32,12 @@
 
 $(PLUGIN_IDCTALTIVEC): %.o: .dep/%.d
 $(PLUGIN_IDCTALTIVEC): %.o: %.c
-	$(CC) $(CFLAGS) -DPLUGIN $(PCFLAGS) -faltivec -c -o $@ $<
+	$(CC) $(CFLAGS) -DPLUGIN $(PCFLAGS) -c -o $@ $<
+#	$(CC) $(CFLAGS) -DPLUGIN $(PCFLAGS) -faltivec -c -o $@ $<
 
 $(PLUGIN_IDCTALTIVECASM): %.o: %.S
-	$(CC) $(CFLAGS) -DPLUGIN $(PCFLAGS) -faltivec -c -o $@ $<
+	$(CC) -Wa,-m7400 $(CFLAGS) -DPLUGIN $(PCFLAGS) -c -o $@ $<
+#	$(CC) $(CFLAGS) -DPLUGIN $(PCFLAGS) -faltivec -c -o $@ $<
 
 $(BUILTIN_IDCT): BUILTIN_IDCT_%.o: .dep/%.d
 $(BUILTIN_IDCT): BUILTIN_IDCT_%.o: %.c
@@ -55,10 +57,12 @@
 
 $(BUILTIN_IDCTALTIVEC): BUILTIN_IDCTALTIVEC_%.o: .dep/%.d
 $(BUILTIN_IDCTALTIVEC): BUILTIN_IDCTALTIVEC_%.o: %.c
-	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -faltivec -c -o $@ $<
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -c -o $@ $<
+#	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -faltivec -c -o $@ $<
 
 $(BUILTIN_IDCTALTIVECASM): BUILTIN_IDCTALTIVEC_%.o: %.S
-	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -faltivec -c -o $@ $<
+	$(CC) -Wa,-m7400 $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -c -o $@ $<
+#	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=idctaltivec -faltivec -c -o $@ $<
 
 #
 # Real targets
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/plugins/idct/idctaltivec.c vlc-cvs-20010909/plugins/idct/idctaltivec.c
--- src/vlc-cvs/vlc/plugins/idct/idctaltivec.c	Thu Sep  6 12:19:18 2001
+++ vlc-cvs-20010909/plugins/idct/idctaltivec.c	Tue Sep 11 00:33:34 2001
@@ -80,15 +80,18 @@
 {
     if( !TestCPU( CPU_CAPABILITY_ALTIVEC ) )
     {
-        return( 0 );
+	    fprintf (stderr, "No Altivec support!\n");
+//        return( 0 );
     }
 
     if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" )
          || TestMethod( IDCT_METHOD_VAR, "altivec" ) )
     {
+	    fprintf (stderr, "Forcing Altivec IDCT!\n");
         return( 999 );
     }
 
+	    fprintf (stderr, "Altivec IDCT enabled.\n");
     return( 200 );
 }
 
@@ -99,11 +102,10 @@
 {
 }
 
-static void InitIDCT( void * p_idct_data )
+static void InitIDCT( void ** pp_idct_data )
 {
 }
 
-
 /*****************************************************************************
  * Functions exported as capabilities. They are declared as static so that
  * we don't pollute the namespace too much.
@@ -121,4 +123,3 @@
     F.pf_idct_copy = idct_block_copy_altivec;
 #undef F
 }
-
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/plugins/motion/motionaltivec.c vlc-cvs-20010909/plugins/motion/motionaltivec.c
--- src/vlc-cvs/vlc/plugins/motion/motionaltivec.c	Thu Sep  6 16:02:56 2001
+++ vlc-cvs-20010909/plugins/motion/motionaltivec.c	Sun Sep  9 16:44:25 2001
@@ -58,7 +58,7 @@
 MODULE_INIT_START
     p_module->i_capabilities = MODULE_CAPABILITY_NULL
                                 | MODULE_CAPABILITY_MOTION;
-    p_module->psz_longname = "MMX motion compensation module";
+    p_module->psz_longname = "Altivec motion compensation module";
 MODULE_INIT_STOP
 
 MODULE_ACTIVATE_START
diff -uNr --exclude=\.dep --exclude=config.* --exclude=doc --exclude=*.o src/vlc-cvs/vlc/src/misc/malloc.c vlc-cvs-20010909/src/misc/malloc.c
--- src/vlc-cvs/vlc/src/misc/malloc.c	Thu Jan  1 01:00:00 1970
+++ vlc-cvs-20010909/src/misc/malloc.c	Sun Sep  9 17:40:12 2001
@@ -0,0 +1,17 @@
+#include "malloc-aligned.h"
+
+void * malign_16( size_t count) {
+	void * aligned = NULL;
+	int size;
+
+	size = (count / sizeof(void *) + 1) * sizeof(void *);
+	size = exp2( ((int)log2(size) + 1) );
+	if (posix_memalign(&aligned, 16, size) != 0)
+		return NULL;
+	else {
+		if ((unsigned long)aligned & 0xf)
+			printf ("Alarm: malloc not aligned: %p\n.", aligned);
+		return aligned;
+	}
+}
+





More information about the vlc-devel mailing list