[x264-devel] Re: [PATCH] Altivec optimizations for quant4x4, quant4x4dc, quant8x8, sub8x8_dct8, sub16x16_dct8, pixel_sa8d_8x8, pixel_sa8d_16x16
Loren Merritt
lorenm at u.washington.edu
Tue Sep 12 18:23:19 CEST 2006
- Previous message: [x264-devel] Re: [PATCH] Altivec optimizations for quant4x4, quant4x4dc, quant8x8, sub8x8_dct8, sub16x16_dct8, pixel_sa8d_8x8, pixel_sa8d_16x16
- Next message: [x264-devel] Re: [PATCH] Altivec optimizations for quant4x4, quant4x4dc, quant8x8, sub8x8_dct8, sub16x16_dct8, pixel_sa8d_8x8, pixel_sa8d_16x16
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
On Sun, 10 Sep 2006, Guillaume POIRIER wrote:
>
> I attached 2 files today.
> First and foremost, is the 12th revision of my patch. It features PMC support
> and an example of how to use them to benchmark in pixel.c (which should be
> stripped off if committed).
> Nothing new except that.
>
> The 2nd patch is just the header file that adds the START/STOP_TIMER macros
> for PPC (it's also present in the whole patchset).
> It's meant to be put in common/ppc.
> I'm putting it aside to ease reviewing and because it's really a
> self-contained "patch".
>
> The question that probably needs to be asked now is: do you guys care to have
> the START/STOP_TIMER also available on other arches, or just for PPC is fine?
> Also note that I licensed this file under LGPL as it's essentially several
> pieces of code taken from FFmpeg and melted..
I use START/STOP_TIMER frequently, I just #include it in any file I want
to benchmark. But if you want to put it permanently in x264, I'll add the
x86 version too.
--Loren Merritt
-------------- next part --------------
Index: common/ppc/timer.h
===================================================================
--- common/ppc/timer.h (revision 0)
+++ common/ppc/timer.h (revision 0)
@@ -0,0 +1,92 @@
+/*****************************************************************************
+ * pp/timer.h: h264 encoder
+ *****************************************************************************
+ *
+ * Copyright 2006 Guillaume POIRIER <gpoirier at mplayerhq.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ */
+
+#ifndef _PPC_TIMER_H
+#define _PPC_TIMER_H 1
+
+/***********************************************************************
+ Accessors to read Performance Monitoring Counters (PMC)
+ **********************************************************************/
+
+/*! adapt this value according to the number of PMC counters of target CPU
+ 75x (G3) has 4, 74xx (G4) 6, and 970x (G5) has 8
+*/
+#define POWERPC_NUM_PMC_ENABLED 8
+
+//! 970 needs different asm routines to read PMCs
+#define POWERPC_970
+
+#ifndef POWERPC_970
+#define POWERP_PMC_DATATYPE unsigned long
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#else /* POWERPC_970 */
+#define POWERP_PMC_DATATYPE unsigned long long
+#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a))
+#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a))
+#if (POWERPC_NUM_PMC_ENABLED > 2)
+#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a))
+#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a))
+#else
+#define POWERPC_GET_PMC3(a) do {} while (0)
+#define POWERPC_GET_PMC4(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 4)
+#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a))
+#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a))
+#else
+#define POWERPC_GET_PMC5(a) do {} while (0)
+#define POWERPC_GET_PMC6(a) do {} while (0)
+#endif
+#if (POWERPC_NUM_PMC_ENABLED > 6)
+#define POWERPC_GET_PMC7(a) asm volatile("mfspr %0, 777" : "=r" (a))
+#define POWERPC_GET_PMC8(a) asm volatile("mfspr %0, 778" : "=r" (a))
+#else
+#define POWERPC_GET_PMC7(a) do {} while (0)
+#define POWERPC_GET_PMC8(a) do {} while (0)
+#endif
+#endif /* POWERPC_970 */
+
+/*! PMCs can be set to monitor all kinds of events. Depending on the ones you
+ selected, the PMC that monitors CPU cycles can differ.
+*/
+static inline uint64_t read_time(void)
+{
+ uint64_t time;
+ POWERPC_GET_PMC2(time);
+ return time;
+}
+
+#endif // _PPC_TIMER_H
Index: common/timer.h
===================================================================
--- common/timer.h (revision 0)
+++ common/timer.h (revision 0)
@@ -0,0 +1,77 @@
+/*****************************************************************************
+ * timer.h: h264 encoder
+ *****************************************************************************
+ *
+ * Copyright 2006 Loren Merritt
+ * mostly based on code from FFmpeg
+ *
+ * This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#define START_TIMER \
+ uint64_t tend;\
+ uint64_t tstart = read_time();
+
+#define STOP_TIMER(id) \
+{\
+ tend = read_time();\
+ {\
+ static uint64_t tsum = 0;\
+ static int tcount = 0;\
+ static int tskip_count = 0;\
+ if( tcount<2 || tend - tstart < 8*tsum/tcount )\
+ {\
+ tsum += tend - tstart;\
+ tcount++;\
+ }\
+ else\
+ tskip_count++;\
+ if( ((tcount+tskip_count) & (tcount+tskip_count-1)) == 0 )\
+ printf( "%"PRIu64" dezicycles in %s, %d runs, %d skips\n",\
+ tsum*10/tcount, id, tcount, tskip_count );\
+ }\
+}
+
+#if defined(ARCH_X86) && defined(__GNUC__)
+static inline uint64_t read_time(void)
+{
+ uint64_t l;
+ asm volatile( "rdtsc" : "=A" (l) );
+ return l;
+}
+#elif defined(ARCH_X86_64) && defined(__GNUC__)
+static inline uint64_t read_time(void)
+{
+ uint64_t a, d;
+ asm volatile( "rdtsc" : "=a" (a), "=d" (d) );
+ return (d << 32) | a;
+}
+#elif defined(ARCH_PPC)
+#include "ppc/timer.h"
+#else
+
+#undef START_TIMER
+#undef STOP_TIMER
+#define START_TIMER
+#define STOP_TIMER(id)
+
+#endif // ARCH
+#endif // _TIMER_H
Index: common/common.h
===================================================================
--- common/common.h (revision 560)
+++ common/common.h (working copy)
@@ -110,6 +110,7 @@
#include "cabac.h"
#include "csp.h"
#include "quant.h"
+#include "timer.h"
/****************************************************************************
* Generals functions
- Previous message: [x264-devel] Re: [PATCH] Altivec optimizations for quant4x4, quant4x4dc, quant8x8, sub8x8_dct8, sub16x16_dct8, pixel_sa8d_8x8, pixel_sa8d_16x16
- Next message: [x264-devel] Re: [PATCH] Altivec optimizations for quant4x4, quant4x4dc, quant8x8, sub8x8_dct8, sub16x16_dct8, pixel_sa8d_8x8, pixel_sa8d_16x16
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the x264-devel
mailing list