[x264-devel] Enable FastShuffle on Penryn and Nehalem CPUs without SSE4

Jason Garrett-Glaser git at videolan.org
Mon Feb 7 06:31:31 CET 2011


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Sun Jan 30 00:00:09 2011 -0500| [0494e8e6cbcd0ed3d5c0886bc3cab28169932563] | committer: Jason Garrett-Glaser

Enable FastShuffle on Penryn and Nehalem CPUs without SSE4

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=0494e8e6cbcd0ed3d5c0886bc3cab28169932563
---

 common/cpu.c |   31 +++++++++++++++++++------------
 1 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/common/cpu.c b/common/cpu.c
index 90793f7..6147f7f 100644
--- a/common/cpu.c
+++ b/common/cpu.c
@@ -179,19 +179,26 @@ uint32_t x264_cpu_detect( void )
         x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
         int family = ((eax>>8)&0xf) + ((eax>>20)&0xff);
         int model  = ((eax>>4)&0xf) + ((eax>>12)&0xf0);
-        /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
-         * theoretically support sse2, but it's significantly slower than mmx for
-         * almost all of x264's functions, so let's just pretend they don't. */
-        if( family == 6 && (model == 9 || model == 13 || model == 14) )
+        if( family == 6 )
         {
-            cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);
-            assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));
-        }
-        /* Detect Atom CPU */
-        if( family == 6 && model == 28 )
-        {
-            cpu |= X264_CPU_SLOW_ATOM;
-            cpu |= X264_CPU_SLOW_CTZ;
+            /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
+             * theoretically support sse2, but it's significantly slower than mmx for
+             * almost all of x264's functions, so let's just pretend they don't. */
+            if( model == 9 || model == 13 || model == 14 )
+            {
+                cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);
+                assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));
+            }
+            /* Detect Atom CPU */
+            else if( model == 28 )
+            {
+                cpu |= X264_CPU_SLOW_ATOM;
+                cpu |= X264_CPU_SLOW_CTZ;
+            }
+            /* Some Penryns and Nehalems are pointlessly crippled (SSE4 disabled), so
+             * detect them here. */
+            else if( model >= 23 )
+                cpu |= X264_CPU_SHUFFLE_IS_FAST;
         }
     }
 



More information about the x264-devel mailing list