[x264-devel] [Alexander Izvorski <aizvorski at gmail.com>] [patch] SSE2 pixel routines
System administration
admin at via.ecp.fr
Fri Jul 22 18:42:58 CEST 2005
The deleted attachment is at:
<http://www.videolan.org/~admin/20050722-videolan/sse2-pixel-routines.diff>
----- Forwarded message from Alexander Izvorski <aizvorski at gmail.com> -----
From: Alexander Izvorski <aizvorski at gmail.com>
Date: Fri, 22 Jul 2005 00:18:57 -0700
To: x264-devel at videolan.org
Subject: [patch] SSE2 pixel routines
Reply-To: Alexander Izvorski <aizvorski at gmail.com>
X-Spam-Status: No, score=-9.4 required=5.0 tests=IN_REP_TO,RCVD_BY_IP,
UNIFIED_PATCH autolearn=failed version=3.0.3
Hello,
Here is an early version of SSE2-optimized routines for sad 16x16 and
16x8, ssd 16x16 and 16x8, and satd from 16x16 to 8x4 (diff against rev
277). None of these have any special alignment requirements. I have
tested that they produce the same results as the mmxext versions, but
I'd appreciate it if someone else tested them as well. They are not
in their final form yet, there are a few places where a few more
instructions can be shaved off.
So how fast are they? They are considerably faster, but I don't know
exactly how much in a typical setup, because the only SSE2-capable
machine I have is a 4-way Xeon box which produces very anomalous
timing results. The instruction count is certainly lower, from 1268
to 852 in the case of satd16x16. I would really appreciate numbers
for a single-processor Pentium-4 and Athlon-64/Opteron. I'll post a
simple benchmarking tool which uses realistic memory access patterns
shortly.
Regards,
-Alex Izvorski
P.S. I've a few questions as well from looking at the original code...
Why is the result of satd divided by two?! That throws away one bit
of precision which would have a small but noticeable impact on PSNR.
(see the "shr eax,1" in MMX_SUM_MM).
Why is MMX_SUM_MM called once for every four 4x4 blocks in satd
functions? The maximum sum from a 4x4 block, as I understand it, is
2*256*4*4, and that will be split between four unsigned doublewords
with each one getting no more than 256*4*4. So (even before we divide
the result by two) it is impossible to saturate the result with less
than sixteen 4x4 blocks. So in theory just one call to MMX_SUM_MM at
the end should be sufficient.
The original version of HADAMARD4_SUB_BADC uses add-add-subtract, is
that faster than the equivalent move-add-subtract? (not on Athlons,
but maybe on P4?) The equivalent in my version uses the
move-add-subtract but can be changed very easily.
P.S. If anyone is interested in hacking on these or porting them
(although it's a bit early for that, they will go through at least one
revision), here is some info that may be helpful. sad and ssd are
straightforward except that psadbw operates on each quadword
separately, the results have to be added together. satd is tricky, it
loads and differences two 4x4 regions simultaneously, then keeps one
region's differences in registers while doing the transform on the
other (too bad there aren't enough registers to do this in mmx - on
the other hand with 16 128bit registers such as altivec and amd64 have
we could do a 4x8 load ;). satd data is usually passed around with
one xmm register having row 0 in the low quadword and row 2 (!) in the
high quadword, and the other register having row 1 and row 3.
hadamard4x4 takes this as input and produces outputs which are the
same but with high quadwords swapped. transpose4x4 expects that. oh,
and by the way psrldq takes a shift in bytes not bits (and isn't that
confusing). hope this helps.
Index: common/i386/pixel.h
===================================================================
--- common/i386/pixel.h (revision 277)
+++ common/i386/pixel.h (working copy)
@@ -48,4 +48,16 @@
int x264_pixel_satd_4x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_4x4_mmxext( uint8_t *, int, uint8_t *, int );
+int x264_pixel_sad_16x16_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_sad_16x8_sse2( uint8_t *, int, uint8_t *, int );
+
+int x264_pixel_ssd_16x16_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_ssd_16x8_sse2( uint8_t *, int, uint8_t *, int );
+
+int x264_pixel_satd_16x16_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_satd_16x8_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_satd_8x16_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_satd_8x8_sse2( uint8_t *, int, uint8_t *, int );
+int x264_pixel_satd_8x4_sse2( uint8_t *, int, uint8_t *, int );
+
#endif
Index: common/i386/pixel-a.asm
===================================================================
--- common/i386/pixel-a.asm (revision 277)
+++ common/i386/pixel-a.asm (working copy)
@@ -53,6 +53,19 @@
paddw mm0, mm3
%endmacro
+%macro SAD_INC_2x16P_SSE2 0
+ movdqu xmm1, [eax]
+ movdqu xmm2, [eax+ebx]
+ movdqu xmm3, [ecx]
+ movdqu xmm4, [ecx+edx]
+ psadbw xmm1, xmm3
+ psadbw xmm2, xmm4
+ lea eax, [eax+2*ebx]
+ paddw xmm1, xmm2
+ lea ecx, [ecx+2*edx]
+ paddw xmm0, xmm1
+%endmacro
+
%macro SAD_INC_2x8P 0
movq mm1, [eax]
movq mm2, [eax+ebx]
@@ -113,6 +126,27 @@
paddd mm0, mm4
%endmacro
+%macro SSD_INC_1x16P_SSE2 0
+ movdqu xmm1, [eax]
+ movdqu xmm2, [ecx]
+
+ movdqa xmm5, xmm1
+ psubusb xmm1, xmm2
+ psubusb xmm2, xmm5
+ por xmm1, xmm2
+
+ movdqa xmm2, xmm1
+ punpcklbw xmm1, xmm7
+ punpckhbw xmm2, xmm7
+ pmaddwd xmm1, xmm1
+ pmaddwd xmm2, xmm2
+
+ add eax, ebx
+ add ecx, edx
+ paddd xmm0, xmm1
+ paddd xmm0, xmm2
+%endmacro
+
%macro SSD_INC_1x8P 0
movq mm1, [eax]
movq mm2, [ecx]
@@ -161,6 +195,17 @@
SSD_INC_1x16P
%endmacro
+%macro SSD_INC_8x16P_SSE2 0
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+ SSD_INC_1x16P_SSE2
+%endmacro
+
%macro SSD_INC_4x8P 0
SSD_INC_1x8P
SSD_INC_1x8P
@@ -303,6 +348,17 @@
cglobal x264_pixel_satd_8x16_mmxext
cglobal x264_pixel_satd_16x16_mmxext
+cglobal x264_pixel_sad_16x16_sse2
+cglobal x264_pixel_sad_16x8_sse2
+cglobal x264_pixel_ssd_16x16_sse2
+cglobal x264_pixel_ssd_16x8_sse2
+cglobal x264_pixel_satd_8x4_sse2
+cglobal x264_pixel_satd_8x8_sse2
+cglobal x264_pixel_satd_16x8_sse2
+cglobal x264_pixel_satd_8x16_sse2
+cglobal x264_pixel_satd_16x16_sse2
+
+
%macro SAD_START 0
push ebx
@@ -320,6 +376,27 @@
ret
%endmacro
+%macro SAD_START_SSE2 0
+ push ebx
+
+ mov eax, [esp+ 8] ; pix1
+ mov ebx, [esp+12] ; stride1
+ mov ecx, [esp+16] ; pix2
+ mov edx, [esp+20] ; stride2
+
+ pxor xmm0, xmm0
+%endmacro
+
+%macro SAD_END_SSE2 0
+ movdqa xmm1, xmm0
+ psrldq xmm1, 8
+ paddw xmm0, xmm1
+ movd eax, xmm0
+
+ pop ebx
+ ret
+%endmacro
+
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int )
@@ -338,6 +415,22 @@
ALIGN 16
;-----------------------------------------------------------------------------
+; int __cdecl x264_pixel_sad_16x16_sse2 (uint8_t *, int, uint8_t *, int )
+;-----------------------------------------------------------------------------
+x264_pixel_sad_16x16_sse2:
+ SAD_START_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_END_SSE2
+
+ALIGN 16
+;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_16x8_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_16x8_mmxext:
@@ -350,6 +443,18 @@
ALIGN 16
;-----------------------------------------------------------------------------
+; int __cdecl x264_pixel_sad_16x8_sse2 (uint8_t *, int, uint8_t *, int )
+;-----------------------------------------------------------------------------
+x264_pixel_sad_16x8_sse2:
+ SAD_START_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_INC_2x16P_SSE2
+ SAD_END_SSE2
+
+ALIGN 16
+;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_8x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_8x16_mmxext:
@@ -432,6 +537,32 @@
ret
%endmacro
+%macro SSD_START_SSE2 0
+ push ebx
+
+ mov eax, [esp+ 8] ; pix1
+ mov ebx, [esp+12] ; stride1
+ mov ecx, [esp+16] ; pix2
+ mov edx, [esp+20] ; stride2
+
+ pxor xmm7, xmm7 ; zero
+ pxor xmm0, xmm0 ; mm0 holds the sum
+%endmacro
+
+%macro SSD_END_SSE2 0
+ movdqa xmm1, xmm0
+ psrldq xmm1, 8
+ paddd xmm0, xmm1
+
+ movdqa xmm1, xmm0
+ psrldq xmm1, 4
+ paddd xmm0, xmm1
+ movd eax, xmm0
+
+ pop ebx
+ ret
+%endmacro
+
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
@@ -443,12 +574,28 @@
SSD_END
ALIGN 16
+;-----------------------------------------------------------------------------
+; int __cdecl x264_pixel_ssd_16x16_sse2 (uint8_t *, int, uint8_t *, int )
+;-----------------------------------------------------------------------------
+x264_pixel_ssd_16x16_sse2:
+ SSD_START_SSE2
+ SSD_INC_8x16P_SSE2
+ SSD_INC_8x16P_SSE2
+ SSD_END_SSE2
+
+ALIGN 16
x264_pixel_ssd_16x8_mmxext:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
+x264_pixel_ssd_16x8_sse2:
+ SSD_START_SSE2
+ SSD_INC_8x16P_SSE2
+ SSD_END_SSE2
+
+ALIGN 16
x264_pixel_ssd_8x16_mmxext:
SSD_START
SSD_INC_4x8P
@@ -797,3 +944,302 @@
pop ebx
ret
+;-----------------------------------------------------------------------------
+
+;;; SSE2 satd stuff
+
+; %1=(row2, row0) %2=(row3, row1) %3=junk
+; output in %1=(row3, row0) and %3=(row2, row1)
+%macro HADAMARD4x4_SSE2 3
+
+ movdqa %3, %1
+ paddw %1, %2
+ psubw %3, %2
+ movdqa %2, %1
+ punpcklqdq %1, %3
+ punpckhqdq %2, %3
+ movdqa %3, %1
+ paddw %1, %2
+ psubw %3, %2
+
+%endmacro
+
+%macro TRANSPOSE4x4_TWIST_SSE2 3 ; %1=(row3, row0) %2=(row2, row1) %3=junk, output in %1 and %2
+
+ movdqa %3, %1
+ punpcklwd %1, %2
+ punpckhwd %2, %3 ; backwards because the high quadwords are already swapped
+
+ movdqa %3, %1
+ punpckldq %1, %2
+ punpckhdq %3, %2
+
+ movdqa %2, %1
+ punpcklqdq %1, %3
+ punpckhqdq %2, %3
+
+%endmacro
+
+;;; loads the difference of two 4x4 blocks into xmm0,xmm1 and xmm4,xmm5 in interleaved-row order
+;;; destroys xmm2, 3 and 7
+%macro LOAD4x8_DIFF_SSE2 0
+
+ pxor xmm7, xmm7
+
+ movq xmm0, [eax]
+ movq xmm1, [eax+ebx]
+ lea eax, [eax+2*ebx]
+
+ movq xmm4, [ecx]
+ movq xmm5, [ecx+edx]
+ lea ecx, [ecx+2*edx]
+
+ movq xmm2, [eax]
+ movq xmm3, [eax+ebx]
+ lea eax, [eax+2*ebx]
+
+ punpcklbw xmm0, xmm7
+ punpcklbw xmm1, xmm7
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ psubw xmm0, xmm4
+ psubw xmm1, xmm5
+
+ movq xmm4, [ecx]
+ movq xmm5, [ecx+edx]
+ lea ecx, [ecx+2*edx]
+
+ punpcklbw xmm2, xmm7
+ punpcklbw xmm3, xmm7
+ punpcklbw xmm4, xmm7
+ punpcklbw xmm5, xmm7
+
+ psubw xmm2, xmm4
+ psubw xmm3, xmm5
+
+ movdqa xmm4, xmm0
+ movdqa xmm5, xmm1
+ punpcklqdq xmm0, xmm2 ; rows 0 and 2
+ punpcklqdq xmm1, xmm3 ; rows 1 and 3
+ punpckhqdq xmm4, xmm2 ; next 4x4 rows 0 and 2
+ punpckhqdq xmm5, xmm3 ; next 4x4 rows 1 and 3
+
+%endmacro
+
+%macro SUM4x4_SSE2 4 ; 02 13 junk sum
+
+ pxor %3, %3
+ psubw %3, %1
+ pmaxsw %1, %3
+
+ pxor %3, %3
+ psubw %3, %2
+ pmaxsw %2, %3
+
+ paddusw %4, %1
+ paddusw %4, %2
+
+%endmacro
+
+%macro SUM_MM_SSE2 2 ; sum junk
+ movdqa %2, %1
+ psrldq %1, 8
+ paddusw %1, %2
+ movdqa %2, %1
+ psrldq %1, 4
+ paddusw %1, %2
+ movdqa %2, %1
+ psrldq %1, 2
+ paddusw %1, %2
+ movd eax,%1
+ and eax,0xffff
+ shr eax,1
+ pxor %1, %1 ; fixme - can save an instruction or two here
+%endmacro
+
+%macro HADAMARD_SSE2 4 ; 02 13 junk sum
+ HADAMARD4x4_SSE2 %1, %2, %3
+ TRANSPOSE4x4_TWIST_SSE2 %1, %3, %2
+ HADAMARD4x4_SSE2 %1, %3, %2
+ SUM4x4_SSE2 %1, %2, %3, %4
+%endmacro
+
+ALIGN 16
+x264_pixel_satd_16x16_sse2:
+ push ebx
+ push ebp
+
+ mov eax, [esp+12] ; pix1
+ mov ebx, [esp+16] ; stride1
+ mov ecx, [esp+20] ; pix2
+ mov edx, [esp+24] ; stride2
+
+ pxor xmm6, xmm6
+ xor ebp, ebp
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+ add ebp, eax
+
+ mov eax, [esp+12]
+ mov ecx, [esp+20]
+ lea eax, [eax+8]
+ lea ecx, [ecx+8]
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+ add ebp, eax
+ mov eax, ebp
+
+ pop ebp
+ pop ebx
+ ret
+
+ALIGN 16
+x264_pixel_satd_8x16_sse2:
+ push ebx
+ push ebp
+
+ mov eax, [esp+12] ; pix1
+ mov ebx, [esp+16] ; stride1
+ mov ecx, [esp+20] ; pix2
+ mov edx, [esp+24] ; stride2
+
+ pxor xmm6, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+
+ pop ebp
+ pop ebx
+ ret
+
+ALIGN 16
+x264_pixel_satd_16x8_sse2:
+ push ebx
+ push ebp
+
+ mov eax, [esp+12] ; pix1
+ mov ebx, [esp+16] ; stride1
+ mov ecx, [esp+20] ; pix2
+ mov edx, [esp+24] ; stride2
+
+ pxor xmm6, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ mov eax, [esp+12]
+ mov ecx, [esp+20]
+ lea eax, [eax+8]
+ lea ecx, [ecx+8]
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+
+ pop ebp
+ pop ebx
+ ret
+
+ALIGN 16
+x264_pixel_satd_8x8_sse2:
+ push ebx
+
+ mov eax, [esp+ 8] ; pix1
+ mov ebx, [esp+12] ; stride1
+ mov ecx, [esp+16] ; pix2
+ mov edx, [esp+20] ; stride2
+
+ pxor xmm6, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+
+ pop ebx
+ ret
+
+ALIGN 16
+x264_pixel_satd_8x4_sse2:
+ push ebx
+
+ mov eax, [esp+ 8] ; pix1
+ mov ebx, [esp+12] ; stride1
+ mov ecx, [esp+16] ; pix2
+ mov edx, [esp+20] ; stride2
+
+ pxor xmm6, xmm6
+
+ LOAD4x8_DIFF_SSE2
+ HADAMARD_SSE2 xmm0, xmm1, xmm7, xmm6
+ HADAMARD_SSE2 xmm4, xmm5, xmm7, xmm6
+
+ SUM_MM_SSE2 xmm6, xmm7
+
+ pop ebx
+ ret
+
Index: common/pixel.c
===================================================================
--- common/pixel.c (revision 277)
+++ common/pixel.c (working copy)
@@ -29,6 +29,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
+#include <stdio.h>
#include "x264.h"
#include "pixel.h"
@@ -431,6 +432,24 @@
pixf->satd[PIXEL_4x4] = x264_pixel_satd_4x4_mmxext;
}
#endif
+
+#ifdef HAVE_SSE2
+ if( cpu&X264_CPU_SSE2 )
+ {
+ pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_sse2;
+ pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_sse2;
+
+ pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_sse2;
+ pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_sse2;
+
+ pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_sse2;
+ pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_sse2;
+ pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_sse2;
+ pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_sse2;
+ pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_sse2;
+ }
+#endif
+
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
{
----- End forwarded message -----
--
System administration <admin at via.ecp.fr>
VIA, Ecole Centrale Paris, France
--
This is the x264-devel mailing-list
To unsubscribe, go to: http://developers.videolan.org/lists.html
More information about the x264-devel
mailing list