[x264-devel] Re: [Patch] x264 on ppc without altivec
Patrice Bensoussan
patrice.bensoussan at free.fr
Mon Mar 5 21:56:09 CET 2007
On 5 Mar 2007, at 17:29, Alexis Ballier wrote:
>>
>> Weeh! I think I know what the problem is!
>>
>> common/cpu.c
>> 134 elif defined( SYS_LINUX )
>> 135 uint32_t x264_cpu_detect( void )
>> 136 {
>> 137 /* FIXME (Linux PPC) */
>> 138 return X264_CPU_ALTIVEC;
>> 139 }
>> 140 #endif
>
>
> That's what I meant with the "nice fixme" in my first mail :)
>
>> Please try attached patch, and tell me if it fixes the problem.
>> If it doesn't, please send a complete GDB backtrace so that I can
>> identify where exactly does it choke.
>
> That's the libmpeg2 method I was refering to, but let's try it
> again to
> have a (meaningful?) gdb output (but I dont think that'll help much) :
>
>
> [Thread debugging using libthread_db enabled]
> [New Thread 805449536 (LWP 3738)]
>
> Program received signal SIGILL, Illegal instruction.
> [Switching to Thread 805449536 (LWP 3738)]
> 0x10022888 in x264_cpu_detect () at common/cpu.c:160
> 160 asm volatile ("mtspr 256, %0\n\t"
> (gdb) bt full
> #0 0x10022888 in x264_cpu_detect () at common/cpu.c:160
> No locals.
> #1 0x100064c4 in x264_param_default (param=0x7fc61ef8) at common/
> common.c:46
> No locals.
> #2 0x10001f2c in main (argc=4, argv=0xfddf520) at x264.c:107
> param = {cpu = 0, i_threads = 0, b_deterministic = 0,
> i_width =
> 0, i_height = 0, i_csp = 0, i_level_idc = 0, i_frame_total = 0, vui =
> {i_sar_height = 0, i_sar_width = 0, i_overscan = 0, i_vidformat = 0,
> b_fullrange = 0, i_colorprim = 0, i_transfer = 0, i_colmatrix = 0,
> i_chroma_loc = 0}, i_fps_num = 0, i_fps_den = 0, i_frame_reference
> = 0,
> i_keyint_max = 0, i_keyint_min = 0, i_scenecut_threshold = 0,
> b_pre_scenecut = 0, i_bframe = 0, b_bframe_adaptive = 0, i_bframe_bias
> = 0, b_bframe_pyramid = 0, b_deblocking_filter = 0,
> i_deblocking_filter_alphac0 = 0, i_deblocking_filter_beta = 0, b_cabac
> = 0, i_cabac_init_idc = 0, b_interlaced = 0, i_cqm_preset = 0,
> psz_cqm_file = 0x0, cqm_4iy = '\0' <repeats 15 times>, cqm_4ic = '\0'
> <repeats 15 times>, cqm_4py = '\0' <repeats 15 times>, cqm_4pc = '\0'
> <repeats 15 times>, cqm_8iy = '\0' <repeats 63 times>, cqm_8py = '\0'
> <repeats 63 times>, pf_log = 0, p_log_private = 0x0, i_log_level = 0,
> b_visualize = 0, analyse = { intra = 0, inter = 0, b_transform_8x8
> = 0,
> b_weighted_bipred = 0, i_direct_mv_pred = 0, i_direct_8x8_inference =
> 0, i_chroma_qp_offset = 0, i_me_method = 0, i_me_range = 0, i_mv_range
> = 0, i_mv_range_thread = 0, i_subpel_refine = 0, b_bidir_me = 0,
> b_chroma_me = 0, b_bframe_rdo = 0, b_mixed_references = 0, i_trellis =
> 0, b_fast_pskip = 0, b_dct_decimate = 0, i_noise_reduction = 0,
> i_luma_deadzone = {0, 0}, b_psnr = 0, b_ssim = 0}, rc = {i_rc_method =
> 0, i_qp_constant = 0, i_qp_min = 0, i_qp_max = 0, i_qp_step = 0,
> i_bitrate = 0, f_rf_constant = 0, f_rate_tolerance = 0,
> i_vbv_max_bitrate = 0, i_vbv_buffer_size = 0, f_vbv_buffer_init = 0,
> f_ip_factor = 0, f_pb_factor = 0, b_stat_write = 0, psz_stat_out =
> 0x0,
> b_stat_read = 0, psz_stat_in = 0x0, psz_rc_eq = 0x0, f_qcompress = 0,
> f_qblur = 0, f_complexity_blur = 0, zones = 0x0, i_zones = 0,
> psz_zones
> = 0x0}, b_aud = 0, b_repeat_headers = 0, i_sps_id = 0} opt =
> {b_progress = 805326432, i_seek = 0, hin = 0x0, hout = 0x0, qpfile =
> 0x0} ret = <value optimized out>
> (gdb)
>
>
> of course common/cpu.c:160 is the asm code.
>
>
> let's try something else :
>
>
> $cat toto.c
> #include <stdio.h>
> #include <signal.h>
> #include <setjmp.h>
>
> static sigjmp_buf jmpbuf;
> static volatile sig_atomic_t canjump = 0;
>
> static void sigill_handler (int sig)
> {
> if (!canjump) {
> signal (sig, SIG_DFL);
> raise (sig);
> }
>
> canjump = 0;
> siglongjmp (jmpbuf, 1);
> }
>
>
> int x264_cpu_detect( void )
> {
> signal (SIGILL, sigill_handler);
> if (sigsetjmp (jmpbuf, 1)) {
> signal (SIGILL, SIG_DFL);
> } else {
> canjump = 1;
>
> asm volatile ("mtspr 256, %0\n\t"
> "vand %%v0, %%v0, %%v0"
> :
> : "r" (-1));
>
> signal (SIGILL, SIG_DFL);
> return 1;
> }
> return 0;
> }
>
>
>
> int main(){
>
> printf("%i\n", x264_cpu_detect());
>
> return 0;
> }
>
>
>
> $ gcc toto.c
> $ ./a.out
> 0
> $
>
>
> so basically the code is not wrong.
>
>
> and now with that patch :
>
> $ svn diff
> Index: common/cpu.c
> ===================================================================
> --- common/cpu.c (revision 628)
> +++ common/cpu.c (working copy)
> @@ -32,6 +32,11 @@
> #include <sys/types.h>
> #include <sys/sysctl.h>
> #endif
> +#ifdef SYS_LINUX
> +#include <signal.h>
> +#include <setjmp.h>
> +#include <stdio.h>
> +#endif
>
> #include <string.h>
>
> @@ -111,7 +116,21 @@
> }
>
> #elif defined( ARCH_PPC )
> +static sigjmp_buf jmpbuf;
> +static volatile sig_atomic_t canjump = 0;
>
> +static void sigill_handler (int sig)
> +{
> + if (!canjump) {
> + printf(" sigill_handler in if(!canjump)\n");
> + signal (sig, SIG_DFL);
> + raise (sig);
> + }
> + printf(" sigill_handler after if(!canjump)\n");
> + canjump = 0;
> + siglongjmp (jmpbuf, 1);
> +}
> +
> #ifdef SYS_MACOSX
> #include <sys/sysctl.h>
> uint32_t x264_cpu_detect( void )
> @@ -134,8 +153,25 @@
> #elif defined( SYS_LINUX )
> uint32_t x264_cpu_detect( void )
> {
> - /* FIXME (Linux PPC) */
> - return X264_CPU_ALTIVEC;
> + signal (SIGILL, sigill_handler);
> + if (sigsetjmp (jmpbuf, 1)) {
> + printf("x264_cpu_detect in if (sigsetjmp (jmpbuf, 1))
> \n");
> + signal (SIGILL, SIG_DFL);
> + } else {
> + printf("x264_cpu_detect in else\n");
> + canjump = 1;
> +
> + asm volatile ("mtspr 256, %0\n\t"
> + "vand %%v0, %%v0, %%v0"
> + :
> + : "r" (-1));
> +
> + signal (SIGILL, SIG_DFL);
> + printf("x264_cpu_detect found altivec\n");
> + return X264_CPU_ALTIVEC;
> + }
> + printf("x264_cpu_detect altivec not found\n");
> + return 0;
> }
> #endif
>
> I get :
>
> $./x264 -o ../toto.264 ../example.y4m
> x264_cpu_detect in else
> sigill_handler after if(!canjump)
> x264_cpu_detect in if (sigsetjmp (jmpbuf, 1))
> x264_cpu_detect altivec not found
> yuv4mpeg: 384x288 at 25/1fps, 0:0
> x264 [info]: using cpu capabilities
> Illegal instruction
>
>
>
> So, to me, that means that the signal is caught by the thread, but
> also sent to the whole process which dies.
> I dont know if there is a way to play nicely with threads and signals
> and found it simpler to disable altivec at compile time, thus the
> patch
> I proposed.
>
>
> Regards,
>
> Alexis.
For info, I tried to run ffmpeg with x264 enabled (pthread disabled
in ffmpeg) and is works fine on a G3 without any patch...
Patrice
--
This is the x264-devel mailing-list
To unsubscribe, go to: http://developers.videolan.org/lists.html
More information about the x264-devel
mailing list