[x265] [PATCH] Vector Primitives : Removed hungarian notation

Mon Jul 8 09:22:17 CEST 2013

# HG changeset patch
# User ggopu
# Date 1373268038 -19800
# Node ID c8c33fdca89b02baf3eb8edffa9bc313f186c434
# Parent  dc13d07919dbaa0a1ba82da1daf76bdf71bd08d1
Vector Primitives : Removed hungarian notation

diff -r dc13d07919db -r c8c33fdca89b source/common/vec/pixel.inc

--- a/source/common/vec/pixel.inc	Mon Jul 08 11:28:20 2013 +0530
+++ b/source/common/vec/pixel.inc	Mon Jul 08 12:50:38 2013 +0530
@@ -103,7 +103,7 @@
 namespace {
 /* File for pixels type-neutral code */
 
-void convert16to32(short *psOrg, int *piDst, int num)
+void convert16to32(short *Org, int *Dst, int num)
 {
     int i;
 
@@ -112,18 +112,18 @@
         Vec8s im16;
         Vec4i im32L, im32H;
 
-        im16.load(psOrg);
+        im16.load(Org);
         im32L = extend_low(im16);
         im32H = extend_high(im16);
-        im32L.store(piDst);
-        im32H.store(piDst + 4);
+        im32L.store(Dst);
+        im32H.store(Dst + 4);
 
-        psOrg += 8;
-        piDst += 8;
+        Org += 8;
+        Dst += 8;
     }
 }
 
-void convert16to32_shl(int *piDst, short *psOrg, intptr_t iStride, int shift, int size)
+void convert16to32_shl(int *Dst, short *Org, intptr_t Stride, int shift, int size)
 {
     int i, j;
 
@@ -134,17 +134,17 @@
             __m128i im16;
             __m128i im32;
 
-            im16 = _mm_loadl_epi64((__m128i*)&psOrg[i*iStride+j]);
+            im16 = _mm_loadl_epi64((__m128i*)&Org[i*Stride+j]);
             im32 = _mm_srai_epi32(_mm_unpacklo_epi16(im16, im16), 16);
             im32 = _mm_slli_epi32(im32, shift);
-            _mm_storeu_si128((__m128i*)piDst, im32);
+            _mm_storeu_si128((__m128i*)Dst, im32);
 
-            piDst += 4;
+            Dst += 4;
         }
     }
 }
 
-void convert16to16_shl(short *psDst, short *psOrg, int width, int height, intptr_t stride, int shift)
+void convert16to16_shl(short *Dst, short *Org, int width, int height, intptr_t stride, int shift)
 {
     int i, j;
 
@@ -154,11 +154,11 @@
         {
             __m128i T00, T01;
 
-            T00 = _mm_loadl_epi64((__m128i*)&psOrg[(i  )*stride]);
-            T01 = _mm_loadl_epi64((__m128i*)&psOrg[(i+1)*stride]);
+            T00 = _mm_loadl_epi64((__m128i*)&Org[(i  )*stride]);
+            T01 = _mm_loadl_epi64((__m128i*)&Org[(i+1)*stride]);
             T00 = _mm_unpacklo_epi64(T00, T01);
             T00 = _mm_slli_epi16(T00, shift);
-            _mm_storeu_si128((__m128i*)&psDst[i*4], T00);
+            _mm_storeu_si128((__m128i*)&Dst[i*4], T00);
         }
     }
     else
@@ -169,15 +169,15 @@
             {
                 __m128i T00;
 
-                T00 = _mm_loadu_si128((__m128i*)&psOrg[i*stride+j]);
+                T00 = _mm_loadu_si128((__m128i*)&Org[i*stride+j]);
                 T00 = _mm_slli_epi16(T00, shift);
-                _mm_storeu_si128((__m128i*)&psDst[i*width+j], T00);
+                _mm_storeu_si128((__m128i*)&Dst[i*width+j], T00);
             }
         }
     }
 }
 
-void convert32to16(int *psOrg, short *piDst, int num)
+void convert32to16(int *Org, short *Dst, int num)
 {
     int i;
 
@@ -186,17 +186,17 @@
         Vec4i im32L, im32H;
         Vec8s im16;
 
-        im32L.load(psOrg);
-        im32H.load(psOrg + 4);
+        im32L.load(Org);
+        im32H.load(Org + 4);
         im16 = compress_saturated(im32L, im32H);
-        im16.store(piDst);
+        im16.store(Dst);
 
-        psOrg += 8;
-        piDst += 8;
+        Org += 8;
+        Dst += 8;
     }
 }
 
-void convert32to16_shr(short *piDst, int *psOrg, int shift, int num)
+void convert32to16_shr(short *Dst, int *Org, int shift, int num)
 {
     int i;
     Vec4i round = _mm_set1_epi32(1 << (shift - 1));
@@ -206,24 +206,24 @@
         Vec4i im32;
         Vec8s im16;
 
-        im32.load(psOrg);
+        im32.load(Org);
         im32 = (im32 + round) >> shift;
         im16 = compress_saturated(im32, im32);
-        store_partial(const_int(8), piDst, im16);
+        store_partial(const_int(8), Dst, im16);
 
-        psOrg += 4;
-        piDst += 4;
+        Org += 4;
+        Dst += 4;
     }
 }
 
 template <int blockSize>
-void transpose(pixel* pDst, pixel* pSrc, intptr_t nStride)
+void transpose(pixel* Dst, pixel* Src, intptr_t Stride)
 {
     for (int k = 0; k < blockSize; k++)
     {
         for (int l = 0; l < blockSize; l++)
         {
-            pDst[k * blockSize + l] = pSrc[l * nStride + k];
+            Dst[k * blockSize + l] = Src[l * Stride + k];
         }
     }
 }
@@ -231,21 +231,21 @@
 #include "utils.h"
 
 #if !HIGH_BIT_DEPTH
-void transpose4(pixel* pDst, pixel* pSrc, intptr_t nStride)
+void transpose4(pixel* Dst, pixel* Src, intptr_t Stride)
 {
     __m128i T00, T01, T02, T03;
 
-    T00 = _mm_cvtsi32_si128(*(int*)&pSrc[0*nStride]);   // [03 02 01 00]
-    T01 = _mm_cvtsi32_si128(*(int*)&pSrc[1*nStride]);   // [13 12 11 10]
-    T02 = _mm_cvtsi32_si128(*(int*)&pSrc[2*nStride]);   // [23 22 21 20]
-    T03 = _mm_cvtsi32_si128(*(int*)&pSrc[3*nStride]);   // [33 32 31 30]
+    T00 = _mm_cvtsi32_si128(*(int*)&Src[0*Stride]);   // [03 02 01 00]
+    T01 = _mm_cvtsi32_si128(*(int*)&Src[1*Stride]);   // [13 12 11 10]
+    T02 = _mm_cvtsi32_si128(*(int*)&Src[2*Stride]);   // [23 22 21 20]
+    T03 = _mm_cvtsi32_si128(*(int*)&Src[3*Stride]);   // [33 32 31 30]
 
     T00 = _mm_unpacklo_epi8(T00, T01);
     T01 = _mm_unpacklo_epi8(T02, T03);
 
     T00 = _mm_unpacklo_epi16(T00, T01);
 
-    _mm_store_si128((__m128i*)pDst, T00);
+    _mm_store_si128((__m128i*)Dst, T00);
 }
 
 #define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, \
@@ -267,107 +267,107 @@
     out3 = _mm_unpackhi_epi32(tr1_1, tr1_3); \
 }
 
-void transpose8(pixel* pDst, pixel* pSrc, intptr_t nStride)
+void transpose8(pixel* Dst, pixel* Src, intptr_t Stride)
 {
     __m128i T00, T01, T02, T03, T04, T05, T06, T07;
 
-    T00 = _mm_loadl_epi64((__m128i*)&pSrc[0*nStride]);   // [07 06 05 04 03 02 01 00]
-    T01 = _mm_loadl_epi64((__m128i*)&pSrc[1*nStride]);   // [17 16 15 14 13 12 11 10]
-    T02 = _mm_loadl_epi64((__m128i*)&pSrc[2*nStride]);   // [27 26 25 24 23 22 21 20]
-    T03 = _mm_loadl_epi64((__m128i*)&pSrc[3*nStride]);   // [37 36 35 34 33 32 31 30]
-    T04 = _mm_loadl_epi64((__m128i*)&pSrc[4*nStride]);   // [47 46 45 44 43 42 41 40]
-    T05 = _mm_loadl_epi64((__m128i*)&pSrc[5*nStride]);   // [57 56 55 54 53 52 51 50]
-    T06 = _mm_loadl_epi64((__m128i*)&pSrc[6*nStride]);   // [67 66 65 64 63 62 61 60]
-    T07 = _mm_loadl_epi64((__m128i*)&pSrc[7*nStride]);   // [77 76 75 74 73 72 71 70]
+    T00 = _mm_loadl_epi64((__m128i*)&Src[0*Stride]);   // [07 06 05 04 03 02 01 00]
+    T01 = _mm_loadl_epi64((__m128i*)&Src[1*Stride]);   // [17 16 15 14 13 12 11 10]
+    T02 = _mm_loadl_epi64((__m128i*)&Src[2*Stride]);   // [27 26 25 24 23 22 21 20]
+    T03 = _mm_loadl_epi64((__m128i*)&Src[3*Stride]);   // [37 36 35 34 33 32 31 30]
+    T04 = _mm_loadl_epi64((__m128i*)&Src[4*Stride]);   // [47 46 45 44 43 42 41 40]
+    T05 = _mm_loadl_epi64((__m128i*)&Src[5*Stride]);   // [57 56 55 54 53 52 51 50]
+    T06 = _mm_loadl_epi64((__m128i*)&Src[6*Stride]);   // [67 66 65 64 63 62 61 60]
+    T07 = _mm_loadl_epi64((__m128i*)&Src[7*Stride]);   // [77 76 75 74 73 72 71 70]
 
     TRANSPOSE_8X8(T00, T01, T02, T03, T04, T05, T06, T07, T00, T01, T02, T03);
 
-    _mm_store_si128((__m128i*)&pDst[0*8], T00);
-    _mm_store_si128((__m128i*)&pDst[2*8], T01);
-    _mm_store_si128((__m128i*)&pDst[4*8], T02);
-    _mm_store_si128((__m128i*)&pDst[6*8], T03);
+    _mm_store_si128((__m128i*)&Dst[0*8], T00);
+    _mm_store_si128((__m128i*)&Dst[2*8], T01);
+    _mm_store_si128((__m128i*)&Dst[4*8], T02);
+    _mm_store_si128((__m128i*)&Dst[6*8], T03);
 }
 
-ALWAYSINLINE void transpose16_dummy(pixel* pDst, intptr_t nStrideD, pixel* pSrc, intptr_t nStrideS)
+ALWAYSINLINE void transpose16_dummy(pixel* Dst, intptr_t StrideD, pixel* Src, intptr_t StrideS)
 {
     __m128i T00, T01, T02, T03, T04, T05, T06, T07;
 
-    T00 = _mm_loadl_epi64((__m128i*)&pSrc[ 0 * nStrideS]);
-    T01 = _mm_loadl_epi64((__m128i*)&pSrc[ 1 * nStrideS]);
-    T02 = _mm_loadl_epi64((__m128i*)&pSrc[ 2 * nStrideS]);
-    T03 = _mm_loadl_epi64((__m128i*)&pSrc[ 3 * nStrideS]);
-    T04 = _mm_loadl_epi64((__m128i*)&pSrc[ 4 * nStrideS]);
-    T05 = _mm_loadl_epi64((__m128i*)&pSrc[ 5 * nStrideS]);
-    T06 = _mm_loadl_epi64((__m128i*)&pSrc[ 6 * nStrideS]);
-    T07 = _mm_loadl_epi64((__m128i*)&pSrc[ 7 * nStrideS]);
+    T00 = _mm_loadl_epi64((__m128i*)&Src[ 0 * StrideS]);
+    T01 = _mm_loadl_epi64((__m128i*)&Src[ 1 * StrideS]);
+    T02 = _mm_loadl_epi64((__m128i*)&Src[ 2 * StrideS]);
+    T03 = _mm_loadl_epi64((__m128i*)&Src[ 3 * StrideS]);
+    T04 = _mm_loadl_epi64((__m128i*)&Src[ 4 * StrideS]);
+    T05 = _mm_loadl_epi64((__m128i*)&Src[ 5 * StrideS]);
+    T06 = _mm_loadl_epi64((__m128i*)&Src[ 6 * StrideS]);
+    T07 = _mm_loadl_epi64((__m128i*)&Src[ 7 * StrideS]);
     TRANSPOSE_8X8(T00, T01, T02, T03, T04, T05, T06, T07, T00, T01, T02, T03);
-    _mm_storel_epi64((__m128i*)&pDst[ 0 * nStrideD], T00);
-    _mm_storeh_pi(     (__m64*)&pDst[ 1 * nStrideD], _mm_castsi128_ps(T00));
-    _mm_storel_epi64((__m128i*)&pDst[ 2 * nStrideD], T01);
-    _mm_storeh_pi(     (__m64*)&pDst[ 3 * nStrideD], _mm_castsi128_ps(T01));
-    _mm_storel_epi64((__m128i*)&pDst[ 4 * nStrideD], T02);
-    _mm_storeh_pi(     (__m64*)&pDst[ 5 * nStrideD], _mm_castsi128_ps(T02));
-    _mm_storel_epi64((__m128i*)&pDst[ 6 * nStrideD], T03);
-    _mm_storeh_pi(     (__m64*)&pDst[ 7 * nStrideD], _mm_castsi128_ps(T03));
+    _mm_storel_epi64((__m128i*)&Dst[ 0 * StrideD], T00);
+    _mm_storeh_pi(     (__m64*)&Dst[ 1 * StrideD], _mm_castsi128_ps(T00));
+    _mm_storel_epi64((__m128i*)&Dst[ 2 * StrideD], T01);
+    _mm_storeh_pi(     (__m64*)&Dst[ 3 * StrideD], _mm_castsi128_ps(T01));
+    _mm_storel_epi64((__m128i*)&Dst[ 4 * StrideD], T02);
+    _mm_storeh_pi(     (__m64*)&Dst[ 5 * StrideD], _mm_castsi128_ps(T02));
+    _mm_storel_epi64((__m128i*)&Dst[ 6 * StrideD], T03);
+    _mm_storeh_pi(     (__m64*)&Dst[ 7 * StrideD], _mm_castsi128_ps(T03));
 
-    T00 = _mm_loadl_epi64((__m128i*)&pSrc[ 0 * nStrideS + 8]);
-    T01 = _mm_loadl_epi64((__m128i*)&pSrc[ 1 * nStrideS + 8]);
-    T02 = _mm_loadl_epi64((__m128i*)&pSrc[ 2 * nStrideS + 8]);
-    T03 = _mm_loadl_epi64((__m128i*)&pSrc[ 3 * nStrideS + 8]);
-    T04 = _mm_loadl_epi64((__m128i*)&pSrc[ 4 * nStrideS + 8]);
-    T05 = _mm_loadl_epi64((__m128i*)&pSrc[ 5 * nStrideS + 8]);
-    T06 = _mm_loadl_epi64((__m128i*)&pSrc[ 6 * nStrideS + 8]);
-    T07 = _mm_loadl_epi64((__m128i*)&pSrc[ 7 * nStrideS + 8]);
+    T00 = _mm_loadl_epi64((__m128i*)&Src[ 0 * StrideS + 8]);
+    T01 = _mm_loadl_epi64((__m128i*)&Src[ 1 * StrideS + 8]);
+    T02 = _mm_loadl_epi64((__m128i*)&Src[ 2 * StrideS + 8]);
+    T03 = _mm_loadl_epi64((__m128i*)&Src[ 3 * StrideS + 8]);
+    T04 = _mm_loadl_epi64((__m128i*)&Src[ 4 * StrideS + 8]);
+    T05 = _mm_loadl_epi64((__m128i*)&Src[ 5 * StrideS + 8]);
+    T06 = _mm_loadl_epi64((__m128i*)&Src[ 6 * StrideS + 8]);
+    T07 = _mm_loadl_epi64((__m128i*)&Src[ 7 * StrideS + 8]);
     TRANSPOSE_8X8(T00, T01, T02, T03, T04, T05, T06, T07, T00, T01, T02, T03);
-    _mm_storel_epi64((__m128i*)&pDst[ 8 * nStrideD], T00);
-    _mm_storeh_pi(     (__m64*)&pDst[ 9 * nStrideD], _mm_castsi128_ps(T00));
-    _mm_storel_epi64((__m128i*)&pDst[10 * nStrideD], T01);
-    _mm_storeh_pi(     (__m64*)&pDst[11 * nStrideD], _mm_castsi128_ps(T01));
-    _mm_storel_epi64((__m128i*)&pDst[12 * nStrideD], T02);
-    _mm_storeh_pi(     (__m64*)&pDst[13 * nStrideD], _mm_castsi128_ps(T02));
-    _mm_storel_epi64((__m128i*)&pDst[14 * nStrideD], T03);
-    _mm_storeh_pi(     (__m64*)&pDst[15 * nStrideD], _mm_castsi128_ps(T03));
+    _mm_storel_epi64((__m128i*)&Dst[ 8 * StrideD], T00);
+    _mm_storeh_pi(     (__m64*)&Dst[ 9 * StrideD], _mm_castsi128_ps(T00));
+    _mm_storel_epi64((__m128i*)&Dst[10 * StrideD], T01);
+    _mm_storeh_pi(     (__m64*)&Dst[11 * StrideD], _mm_castsi128_ps(T01));
+    _mm_storel_epi64((__m128i*)&Dst[12 * StrideD], T02);
+    _mm_storeh_pi(     (__m64*)&Dst[13 * StrideD], _mm_castsi128_ps(T02));
+    _mm_storel_epi64((__m128i*)&Dst[14 * StrideD], T03);
+    _mm_storeh_pi(     (__m64*)&Dst[15 * StrideD], _mm_castsi128_ps(T03));
 
-    T00 = _mm_loadl_epi64((__m128i*)&pSrc[ 8 * nStrideS]);
-    T01 = _mm_loadl_epi64((__m128i*)&pSrc[ 9 * nStrideS]);
-    T02 = _mm_loadl_epi64((__m128i*)&pSrc[10 * nStrideS]);
-    T03 = _mm_loadl_epi64((__m128i*)&pSrc[11 * nStrideS]);
-    T04 = _mm_loadl_epi64((__m128i*)&pSrc[12 * nStrideS]);
-    T05 = _mm_loadl_epi64((__m128i*)&pSrc[13 * nStrideS]);
-    T06 = _mm_loadl_epi64((__m128i*)&pSrc[14 * nStrideS]);
-    T07 = _mm_loadl_epi64((__m128i*)&pSrc[15 * nStrideS]);
+    T00 = _mm_loadl_epi64((__m128i*)&Src[ 8 * StrideS]);
+    T01 = _mm_loadl_epi64((__m128i*)&Src[ 9 * StrideS]);
+    T02 = _mm_loadl_epi64((__m128i*)&Src[10 * StrideS]);
+    T03 = _mm_loadl_epi64((__m128i*)&Src[11 * StrideS]);
+    T04 = _mm_loadl_epi64((__m128i*)&Src[12 * StrideS]);
+    T05 = _mm_loadl_epi64((__m128i*)&Src[13 * StrideS]);
+    T06 = _mm_loadl_epi64((__m128i*)&Src[14 * StrideS]);
+    T07 = _mm_loadl_epi64((__m128i*)&Src[15 * StrideS]);
     TRANSPOSE_8X8(T00, T01, T02, T03, T04, T05, T06, T07, T00, T01, T02, T03);
-    _mm_storel_epi64((__m128i*)&pDst[ 0 * nStrideD + 8], T00);
-    _mm_storeh_pi(     (__m64*)&pDst[ 1 * nStrideD + 8], _mm_castsi128_ps(T00));
-    _mm_storel_epi64((__m128i*)&pDst[ 2 * nStrideD + 8], T01);
-    _mm_storeh_pi(     (__m64*)&pDst[ 3 * nStrideD + 8], _mm_castsi128_ps(T01));
-    _mm_storel_epi64((__m128i*)&pDst[ 4 * nStrideD + 8], T02);
-    _mm_storeh_pi(     (__m64*)&pDst[ 5 * nStrideD + 8], _mm_castsi128_ps(T02));
-    _mm_storel_epi64((__m128i*)&pDst[ 6 * nStrideD + 8], T03);
-    _mm_storeh_pi(     (__m64*)&pDst[ 7 * nStrideD + 8], _mm_castsi128_ps(T03));
+    _mm_storel_epi64((__m128i*)&Dst[ 0 * StrideD + 8], T00);
+    _mm_storeh_pi(     (__m64*)&Dst[ 1 * StrideD + 8], _mm_castsi128_ps(T00));
+    _mm_storel_epi64((__m128i*)&Dst[ 2 * StrideD + 8], T01);
+    _mm_storeh_pi(     (__m64*)&Dst[ 3 * StrideD + 8], _mm_castsi128_ps(T01));
+    _mm_storel_epi64((__m128i*)&Dst[ 4 * StrideD + 8], T02);
+    _mm_storeh_pi(     (__m64*)&Dst[ 5 * StrideD + 8], _mm_castsi128_ps(T02));
+    _mm_storel_epi64((__m128i*)&Dst[ 6 * StrideD + 8], T03);
+    _mm_storeh_pi(     (__m64*)&Dst[ 7 * StrideD + 8], _mm_castsi128_ps(T03));
 
-    T00 = _mm_loadl_epi64((__m128i*)&pSrc[ 8 * nStrideS + 8]);
-    T01 = _mm_loadl_epi64((__m128i*)&pSrc[ 9 * nStrideS + 8]);
-    T02 = _mm_loadl_epi64((__m128i*)&pSrc[10 * nStrideS + 8]);
-    T03 = _mm_loadl_epi64((__m128i*)&pSrc[11 * nStrideS + 8]);
-    T04 = _mm_loadl_epi64((__m128i*)&pSrc[12 * nStrideS + 8]);
-    T05 = _mm_loadl_epi64((__m128i*)&pSrc[13 * nStrideS + 8]);
-    T06 = _mm_loadl_epi64((__m128i*)&pSrc[14 * nStrideS + 8]);
-    T07 = _mm_loadl_epi64((__m128i*)&pSrc[15 * nStrideS + 8]);
+    T00 = _mm_loadl_epi64((__m128i*)&Src[ 8 * StrideS + 8]);
+    T01 = _mm_loadl_epi64((__m128i*)&Src[ 9 * StrideS + 8]);
+    T02 = _mm_loadl_epi64((__m128i*)&Src[10 * StrideS + 8]);
+    T03 = _mm_loadl_epi64((__m128i*)&Src[11 * StrideS + 8]);
+    T04 = _mm_loadl_epi64((__m128i*)&Src[12 * StrideS + 8]);
+    T05 = _mm_loadl_epi64((__m128i*)&Src[13 * StrideS + 8]);
+    T06 = _mm_loadl_epi64((__m128i*)&Src[14 * StrideS + 8]);
+    T07 = _mm_loadl_epi64((__m128i*)&Src[15 * StrideS + 8]);
     TRANSPOSE_8X8(T00, T01, T02, T03, T04, T05, T06, T07, T00, T01, T02, T03);
-    _mm_storel_epi64((__m128i*)&pDst[ 8 * nStrideD + 8], T00);
-    _mm_storeh_pi(     (__m64*)&pDst[ 9 * nStrideD + 8], _mm_castsi128_ps(T00));
-    _mm_storel_epi64((__m128i*)&pDst[10 * nStrideD + 8], T01);
-    _mm_storeh_pi(     (__m64*)&pDst[11 * nStrideD + 8], _mm_castsi128_ps(T01));
-    _mm_storel_epi64((__m128i*)&pDst[12 * nStrideD + 8], T02);
-    _mm_storeh_pi(     (__m64*)&pDst[13 * nStrideD + 8], _mm_castsi128_ps(T02));
-    _mm_storel_epi64((__m128i*)&pDst[14 * nStrideD + 8], T03);
-    _mm_storeh_pi(     (__m64*)&pDst[15 * nStrideD + 8], _mm_castsi128_ps(T03));
+    _mm_storel_epi64((__m128i*)&Dst[ 8 * StrideD + 8], T00);
+    _mm_storeh_pi(     (__m64*)&Dst[ 9 * StrideD + 8], _mm_castsi128_ps(T00));
+    _mm_storel_epi64((__m128i*)&Dst[10 * StrideD + 8], T01);
+    _mm_storeh_pi(     (__m64*)&Dst[11 * StrideD + 8], _mm_castsi128_ps(T01));
+    _mm_storel_epi64((__m128i*)&Dst[12 * StrideD + 8], T02);
+    _mm_storeh_pi(     (__m64*)&Dst[13 * StrideD + 8], _mm_castsi128_ps(T02));
+    _mm_storel_epi64((__m128i*)&Dst[14 * StrideD + 8], T03);
+    _mm_storeh_pi(     (__m64*)&Dst[15 * StrideD + 8], _mm_castsi128_ps(T03));
 }
 
-void transpose16(pixel* pDst, pixel* pSrc, intptr_t nStrideS)
+void transpose16(pixel* Dst, pixel* Src, intptr_t StrideS)
 {
-    transpose16_dummy(pDst, 16, pSrc, nStrideS);
+    transpose16_dummy(Dst, 16, Src, StrideS);
 }
 
 void transpose32(pixel* dst, pixel* src, intptr_t strideSrc)
diff -r dc13d07919db -r c8c33fdca89b source/common/vec/pixel16.inc
--- a/source/common/vec/pixel16.inc	Mon Jul 08 11:28:20 2013 +0530
+++ b/source/common/vec/pixel16.inc	Mon Jul 08 12:50:38 2013 +0530
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * Copyright (C) 2013 x265 project
  *
- * Authors: Steve Borho <steve at borho.org>
+ * Authors: Steve Borho <steve at borho.Org>
  *          Mandar Gurav <mandar at multicorewareinc.com>
  *          Mahesh Pittala <mahesh at multicorewareinc.com>
  *
@@ -28,25 +28,25 @@
 /* intrinsics for when pixel type is short */
 
 template<int ly>
-int sad_4(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_4(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad);
@@ -55,12 +55,12 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad);
@@ -69,25 +69,25 @@
 }
 
 template<int ly>
-int sad_8(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_8(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -96,12 +96,12 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -110,31 +110,31 @@
 }
 
 template<int ly>
-int sad_12(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_12(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
+            m1.load_a(Org + 8);
             m1.cutoff(4);
-            n1.load(piCur + 8);
+            n1.load(Cur + 8);
             n1.cutoff(4);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -143,18 +143,18 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 8);
+        m1.load_a(Org + 8);
         m1.cutoff(4);
-        n1.load(piCur + 8);
+        n1.load(Cur + 8);
         n1.cutoff(4);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -163,29 +163,29 @@
 }
 
 template<int ly>
-int sad_16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_16(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur + 8);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -194,16 +194,16 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur + 8);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -212,33 +212,33 @@
 }
 
 template<int ly>
-int sad_24(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur)
+int sad_24(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur + 8);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur + 16);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -247,20 +247,20 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur + 8);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur + 16);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -269,37 +269,37 @@
 }
 
 template<int ly>
-int sad_32(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_32(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur + 8);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur + 16);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur + 24);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -308,24 +308,24 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur + 8);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur + 16);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 24);
-        n1.load(piCur + 24);
+        m1.load_a(Org + 24);
+        n1.load(Cur + 24);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -334,45 +334,45 @@
 }
 
 template<int ly>
-int sad_48(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_48(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur + 8);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur + 16);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur + 24);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur + 32);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur + 40);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -381,32 +381,32 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur + 8);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur + 16);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 24);
-        n1.load(piCur + 24);
+        m1.load_a(Org + 24);
+        n1.load(Cur + 24);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 32);
-        n1.load(piCur + 32);
+        m1.load_a(Org + 32);
+        n1.load(Cur + 32);
         sad += abs(m1 - n1);
 
-        m1.load_a(piOrg + 40);
-        n1.load(piCur + 40);
+        m1.load_a(Org + 40);
+        n1.load(Cur + 40);
         sad += abs(m1 - n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -415,7 +415,7 @@
 }
 
 template<int ly>
-int sad_64(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_64(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec8s m1, n1;
 
@@ -427,40 +427,40 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur + 8);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur + 16);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur + 24);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur + 32);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur + 40);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 48);
-            n1.load(piCur + 48);
+            m1.load_a(Org + 48);
+            n1.load(Cur + 48);
             sad += abs(m1 - n1);
 
-            m1.load_a(piOrg + 56);
-            n1.load(piCur + 56);
+            m1.load_a(Org + 56);
+            n1.load(Cur + 56);
             sad += abs(m1 - n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -471,32 +471,32 @@
 }
 
 template<int ly>
-void sad_x3_4(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_4(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
     Vec8us sad1(0), sad2(0), sad3(0);
     Vec4i sum1(0), sum2(0), sum3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -509,19 +509,19 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -534,32 +534,32 @@
 }
 
 template<int ly>
-void sad_x3_8(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_8(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
     Vec8us sad1(0), sad2(0), sad3(0);
     Vec4i sum1(0), sum2(0), sum3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -572,19 +572,19 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -597,45 +597,45 @@
 }
 
 template<int ly>
-void sad_x3_12(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_12(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
     Vec8us sad1(0), sad2(0), sad3(0);
     Vec4i sum1(0), sum2(0), sum3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
+            m1.load_a(Org + 8);
             m1.cutoff(4);
-            n1.load(piCur1 + 8);
+            n1.load(Cur1 + 8);
             n1.cutoff(4);
-            n2.load(piCur2 + 8);
+            n2.load(Cur2 + 8);
             n2.cutoff(4);
-            n3.load(piCur3 + 8);
+            n3.load(Cur3 + 8);
             n3.cutoff(4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -648,32 +648,32 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        m1.load_a(piOrg + 8);
+        m1.load_a(Org + 8);
         m1.cutoff(4);
-        n1.load(piCur1 + 8);
+        n1.load(Cur1 + 8);
         n1.cutoff(4);
-        n2.load(piCur2 + 8);
+        n2.load(Cur2 + 8);
         n2.cutoff(4);
-        n3.load(piCur3 + 8);
+        n3.load(Cur3 + 8);
         n3.cutoff(4);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -686,41 +686,41 @@
 }
 
 template<int ly>
-void sad_x3_16(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_16(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
     Vec8us sad1(0), sad2(0), sad3(0);
     Vec4i sum1(0), sum2(0), sum3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -733,28 +733,28 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur1 + 8);
-        n2.load(piCur2 + 8);
-        n3.load(piCur3 + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur1 + 8);
+        n2.load(Cur2 + 8);
+        n3.load(Cur3 + 8);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -767,7 +767,7 @@
 }
 
 template<int ly>
-void sad_x3_24(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_24(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
@@ -779,37 +779,37 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -826,7 +826,7 @@
 }
 
 template<int ly>
-void sad_x3_32(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_32(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
@@ -838,46 +838,46 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -894,7 +894,7 @@
 }
 
 template<int ly>
-void sad_x3_48(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_48(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
@@ -906,64 +906,64 @@
     {
         for (int i = 0; i < 2; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur1 + 40);
-            n2.load(piCur2 + 40);
-            n3.load(piCur3 + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur1 + 40);
+            n2.load(Cur2 + 40);
+            n3.load(Cur3 + 40);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -980,7 +980,7 @@
 }
 
 template<int ly>
-void sad_x3_64(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_64(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3;
 
@@ -992,82 +992,82 @@
     {
         for (int i = 0; i < 2; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur1 + 40);
-            n2.load(piCur2 + 40);
-            n3.load(piCur3 + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur1 + 40);
+            n2.load(Cur2 + 40);
+            n3.load(Cur3 + 40);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 48);
-            n1.load(piCur1 + 48);
-            n2.load(piCur2 + 48);
-            n3.load(piCur3 + 48);
+            m1.load_a(Org + 48);
+            n1.load(Cur1 + 48);
+            n2.load(Cur2 + 48);
+            n3.load(Cur3 + 48);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            m1.load_a(piOrg + 56);
-            n1.load(piCur1 + 56);
-            n2.load(piCur2 + 56);
-            n3.load(piCur3 + 56);
+            m1.load_a(Org + 56);
+            n1.load(Cur1 + 56);
+            n2.load(Cur2 + 56);
+            n3.load(Cur3 + 56);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1084,35 +1084,35 @@
 }
 
 template<int ly>
-void sad_x4_4(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_4(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -1127,22 +1127,22 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -1157,35 +1157,35 @@
 }
 
 template<int ly>
-void sad_x4_8(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_8(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1200,22 +1200,22 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1230,39 +1230,39 @@
 }
 
 template<int ly>
-void sad_x4_12(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_12(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
+            m1.load_a(Org + 8);
             m1.cutoff(4);
-            n1.load(piCur1 + 8);
+            n1.load(Cur1 + 8);
             n1.cutoff(4);
-            n2.load(piCur2 + 8);
+            n2.load(Cur2 + 8);
             n2.cutoff(4);
-            n3.load(piCur3 + 8);
+            n3.load(Cur3 + 8);
             n3.cutoff(4);
-            n4.load(piCur4 + 8);
+            n4.load(Cur4 + 8);
             n4.cutoff(4);
 
             sad1 += abs(m1 - n1);
@@ -1270,11 +1270,11 @@
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1289,26 +1289,26 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        m1.load_a(piOrg + 8);
+        m1.load_a(Org + 8);
         m1.cutoff(4);
-        n1.load(piCur1 + 8);
+        n1.load(Cur1 + 8);
         n1.cutoff(4);
-        n2.load(piCur2 + 8);
+        n2.load(Cur2 + 8);
         n2.cutoff(4);
-        n3.load(piCur3 + 8);
+        n3.load(Cur3 + 8);
         n3.cutoff(4);
-        n4.load(piCur4 + 8);
+        n4.load(Cur4 + 8);
         n4.cutoff(4);
 
         sad1 += abs(m1 - n1);
@@ -1316,11 +1316,11 @@
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1335,46 +1335,46 @@
 }
 
 template<int ly>
-void sad_x4_16(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_16(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
-            n4.load(piCur4 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
+            n4.load(Cur4 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1389,33 +1389,33 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        m1.load_a(piOrg + 8);
-        n1.load(piCur1 + 8);
-        n2.load(piCur2 + 8);
-        n3.load(piCur3 + 8);
-        n4.load(piCur4 + 8);
+        m1.load_a(Org + 8);
+        n1.load(Cur1 + 8);
+        n2.load(Cur2 + 8);
+        n3.load(Cur3 + 8);
+        n4.load(Cur4 + 8);
 
         sad1 += abs(m1 - n1);
         sad2 += abs(m1 - n2);
         sad3 += abs(m1 - n3);
         sad4 += abs(m1 - n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1430,7 +1430,7 @@
 }
 
 template<int ly>
-void sad_x4_24(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_24(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
@@ -1442,44 +1442,44 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
-            n4.load(piCur4 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
+            n4.load(Cur4 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1499,7 +1499,7 @@
 }
 
 template<int ly>
-void sad_x4_32(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_32(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
@@ -1511,55 +1511,55 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
-            n4.load(piCur4 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
+            n4.load(Cur4 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
-            n4.load(piCur4 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
+            n4.load(Cur4 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1579,7 +1579,7 @@
 }
 
 template<int ly>
-void sad_x4_48(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_48(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
@@ -1591,77 +1591,77 @@
     {
         for (int i = 0; i < 2; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
-            n4.load(piCur4 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
+            n4.load(Cur4 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
-            n4.load(piCur4 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
+            n4.load(Cur4 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
-            n4.load(piCur4 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
+            n4.load(Cur4 + 32);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur1 + 40);
-            n2.load(piCur2 + 40);
-            n3.load(piCur3 + 40);
-            n4.load(piCur4 + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur1 + 40);
+            n2.load(Cur2 + 40);
+            n3.load(Cur3 + 40);
+            n4.load(Cur4 + 40);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1681,7 +1681,7 @@
 }
 
 template<int ly>
-void sad_x4_64(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_64(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec8s m1, n1, n2, n3, n4;
 
@@ -1693,99 +1693,99 @@
     {
         for (int i = 0; i < 2; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 8);
-            n1.load(piCur1 + 8);
-            n2.load(piCur2 + 8);
-            n3.load(piCur3 + 8);
-            n4.load(piCur4 + 8);
+            m1.load_a(Org + 8);
+            n1.load(Cur1 + 8);
+            n2.load(Cur2 + 8);
+            n3.load(Cur3 + 8);
+            n4.load(Cur4 + 8);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 24);
-            n1.load(piCur1 + 24);
-            n2.load(piCur2 + 24);
-            n3.load(piCur3 + 24);
-            n4.load(piCur4 + 24);
+            m1.load_a(Org + 24);
+            n1.load(Cur1 + 24);
+            n2.load(Cur2 + 24);
+            n3.load(Cur3 + 24);
+            n4.load(Cur4 + 24);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
-            n4.load(piCur4 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
+            n4.load(Cur4 + 32);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 40);
-            n1.load(piCur1 + 40);
-            n2.load(piCur2 + 40);
-            n3.load(piCur3 + 40);
-            n4.load(piCur4 + 40);
+            m1.load_a(Org + 40);
+            n1.load(Cur1 + 40);
+            n2.load(Cur2 + 40);
+            n3.load(Cur3 + 40);
+            n4.load(Cur4 + 40);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 48);
-            n1.load(piCur1 + 48);
-            n2.load(piCur2 + 48);
-            n3.load(piCur3 + 48);
-            n4.load(piCur4 + 48);
+            m1.load_a(Org + 48);
+            n1.load(Cur1 + 48);
+            n2.load(Cur2 + 48);
+            n3.load(Cur3 + 48);
+            n4.load(Cur4 + 48);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            m1.load_a(piOrg + 56);
-            n1.load(piCur1 + 56);
-            n2.load(piCur2 + 56);
-            n3.load(piCur3 + 56);
-            n4.load(piCur4 + 56);
+            m1.load_a(Org + 56);
+            n1.load(Cur1 + 56);
+            n2.load(Cur2 + 56);
+            n3.load(Cur3 + 56);
+            n4.load(Cur4 + 56);
 
             sad1 += abs(m1 - n1);
             sad2 += abs(m1 - n2);
             sad3 += abs(m1 - n3);
             sad4 += abs(m1 - n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1804,42 +1804,42 @@
     res[3] = horizontal_add(sum4);
 }
 
-int satd_4x4(pixel * piOrg, intptr_t iStrideOrg, pixel * piCur, intptr_t iStrideCur)
+int satd_4x4(pixel * Org, intptr_t istrideOrg, pixel * Cur, intptr_t istrideCur)
 {
     int satd = 0;
 
     Vec8s v1, v2, m1, m2;
 
     {
-        Vec8s temp1, temp2, temp3, temp4, piOrg_v, piCur_v;
-        temp1.load(piOrg);
-        temp2.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        Vec8s temp1, temp2, temp3, temp4, Org_v, Cur_v;
+        temp1.load(Org);
+        temp2.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        temp3.load(piOrg);
-        temp4.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        temp3.load(Org);
+        temp4.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        piOrg_v = blend2q<0, 2>((Vec2q)temp1, (Vec2q)temp3);
-        piCur_v = blend2q<0, 2>((Vec2q)temp2, (Vec2q)temp4);
+        Org_v = blend2q<0, 2>((Vec2q)temp1, (Vec2q)temp3);
+        Cur_v = blend2q<0, 2>((Vec2q)temp2, (Vec2q)temp4);
 
-        temp1.load(piOrg);
-        temp2.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        temp1.load(Org);
+        temp2.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        temp3.load(piOrg);
-        temp4.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        temp3.load(Org);
+        temp4.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        v1 = piOrg_v - piCur_v; //diff
+        v1 = Org_v - Cur_v; //diff
 
-        piOrg_v = blend2q<0, 2>((Vec2q)temp3, (Vec2q)temp1);
-        piCur_v = blend2q<0, 2>((Vec2q)temp4, (Vec2q)temp2);
-        v2 = piOrg_v - piCur_v; //diff
+        Org_v = blend2q<0, 2>((Vec2q)temp3, (Vec2q)temp1);
+        Cur_v = blend2q<0, 2>((Vec2q)temp4, (Vec2q)temp2);
+        v2 = Org_v - Cur_v; //diff
     }
 
     for (int i = 0; i < 2; i++)
@@ -1872,29 +1872,29 @@
     return satd;
 }
 
-int sa8d_8x8(pixel * piOrg, intptr_t iStrideOrg, pixel * piCur, intptr_t iStrideCur)
+int sa8d_8x8(pixel * Org, intptr_t istrideOrg, pixel * Cur, intptr_t istrideCur)
 {
     ALIGN_VAR_16(short, m2[8][8]);
 
-    Vec8s diff_v1, diff_v2, piOrg_v1, piOrg_v2, piCur_v1, piCur_v2;
+    Vec8s diff_v1, diff_v2, Org_v1, Org_v2, Cur_v1, Cur_v2;
     Vec8s v1, v2, t1, t2;
 
     int  j, satd = 0;
 
     for (j = 0; j < 8; j += 2)
     {
-        piOrg_v1.load_a(piOrg);
-        piCur_v1.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        Org_v1.load_a(Org);
+        Cur_v1.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        piOrg_v2.load_a(piOrg);
-        piCur_v2.load(piCur);
-        piCur += iStrideCur;
-        piOrg += iStrideOrg;
+        Org_v2.load_a(Org);
+        Cur_v2.load(Cur);
+        Cur += istrideCur;
+        Org += istrideOrg;
 
-        diff_v1 = piOrg_v1 - piCur_v1;
-        diff_v2 = piOrg_v2 - piCur_v2;
+        diff_v1 = Org_v1 - Cur_v1;
+        diff_v2 = Org_v2 - Cur_v2;
 
         v1 = blend8s<0, 8, 1, 9, 2, 10, 3, 11>(diff_v1, diff_v2);
         v2 = blend8s<4, 12, 5, 13, 6, 14, 7, 15>(diff_v1, diff_v2);
@@ -2039,7 +2039,7 @@
 }
 
 template<int lx, int ly>
-int satd(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int satd(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     int uiSum = 0;
 
@@ -2047,8 +2047,8 @@
     {
         for (int col = 0; col < lx; col += 4)
         {
-            uiSum += satd_4x4(piOrg + strideOrg * row + col, strideOrg,
-                              piCur + strideCur * row + col, strideCur);
+            uiSum += satd_4x4(Org + strideOrg * row + col, strideOrg,
+                              Cur + strideCur * row + col, strideCur);
         }
     }
 
diff -r dc13d07919db -r c8c33fdca89b source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc	Mon Jul 08 11:28:20 2013 +0530
+++ b/source/common/vec/pixel8.inc	Mon Jul 08 12:50:38 2013 +0530
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * Copyright (C) 2013 x265 project
  *
- * Authors: Steve Borho <steve at borho.org>
+ * Authors: Steve Borho <steve at borho.Org>
  *          Mandar Gurav <mandar at multicorewareinc.com>
  *          Mahesh Pittala <mahesh at multicorewareinc.com>
  *
@@ -33,25 +33,25 @@
 #endif
 
 template<int ly>
-int sad_4(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_4(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec16uc m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.fromUint32(*(uint32_t*)piOrg);
-            n1.fromUint32(*(uint32_t*)piCur);
+            m1.fromUint32(*(uint32_t*)Org);
+            n1.fromUint32(*(uint32_t*)Cur);
             sad.addSumAbsDiff(m1, n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad);
@@ -60,12 +60,12 @@
 
     while (row++ < ly)
     {
-        m1.fromUint32(*(uint32_t*)piOrg);
-        n1.fromUint32(*(uint32_t*)piCur);
+        m1.fromUint32(*(uint32_t*)Org);
+        n1.fromUint32(*(uint32_t*)Cur);
         sad.addSumAbsDiff(m1, n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad);
@@ -73,77 +73,77 @@
 }
 
 template<int size>
-ALWAYSINLINE void unrollFunc_8(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_8(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
-    unrollFunc_8<1>(piOrg, strideOrg, piCur, strideCur, sad);
-    unrollFunc_8<size - 1>(piOrg + strideOrg, strideOrg, piCur + strideCur, strideCur, sad);
+    unrollFunc_8<1>(Org, strideOrg, Cur, strideCur, sad);
+    unrollFunc_8<size - 1>(Org + strideOrg, strideOrg, Cur + strideCur, strideCur, sad);
 }
 
 template<>
-ALWAYSINLINE void unrollFunc_8<1>(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_8<1>(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
     Vec16uc m1, n1;
 
-    m1.load_a(piOrg);
-    n1.load(piCur);
+    m1.load_a(Org);
+    n1.load(Cur);
     sad.addSumAbsDiff(m1, n1);
 }
 
 template<int ly>
-int sad_8(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_8(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec4i sum(0);
     Vec8us sad;
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
     if (ly < 16)
     {
         sad = 0;
-        unrollFunc_8<ly>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_8<ly>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad);
         return horizontal_add(sum);
     }
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         sad = 0;
-        unrollFunc_8<16>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_8<16>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad);
-        piOrg += strideOrg * 16;
-        piCur += strideCur * 16;
+        Org += strideOrg * 16;
+        Cur += strideCur * 16;
     }
 
     if (ly & 8)
     {
         sad = 0;
-        unrollFunc_8<8>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_8<8>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad);
     }
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sad_12(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_12(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec16uc m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
+            m1.load_a(Org);
             m1.cutoff(12);
-            n1.load(piCur);
+            n1.load(Cur);
             n1.cutoff(12);
             sad.addSumAbsDiff(m1, n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -152,14 +152,14 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
+        m1.load_a(Org);
         m1.cutoff(12);
-        n1.load(piCur);
+        n1.load(Cur);
         n1.cutoff(12);
         sad.addSumAbsDiff(m1, n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -167,48 +167,48 @@
 }
 
 template<int size>
-ALWAYSINLINE void unrollFunc_16(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_16(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
-    unrollFunc_16<1>(piOrg, strideOrg, piCur, strideCur, sad);
-    unrollFunc_16<size - 1>(piOrg + strideOrg, strideOrg, piCur + strideCur, strideCur, sad);
+    unrollFunc_16<1>(Org, strideOrg, Cur, strideCur, sad);
+    unrollFunc_16<size - 1>(Org + strideOrg, strideOrg, Cur + strideCur, strideCur, sad);
 }
 
 template<>
-ALWAYSINLINE void unrollFunc_16<1>(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_16<1>(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
     Vec16uc m1, n1;
 
-    m1.load_a(piOrg);
-    n1.load(piCur);
+    m1.load_a(Org);
+    n1.load(Cur);
     sad.addSumAbsDiff(m1, n1);
 }
 
 template<int ly>
-int sad_16(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_16(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row = 0;
 
     if (ly < 16)
     {
-        unrollFunc_16<ly>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_16<ly>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
         return horizontal_add(sum);
     }
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
-        unrollFunc_16<16>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_16<16>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
         sad = 0;
-        piOrg += strideOrg * 16;
-        piCur += strideCur * 16;
+        Org += strideOrg * 16;
+        Cur += strideCur * 16;
     }
 
     if (ly & 8)
     {
-        unrollFunc_16<8>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_16<8>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
         return horizontal_add(sum);
     }
@@ -216,31 +216,31 @@
 }
 
 template<int ly>
-int sad_24(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur)
+int sad_24(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur)
 {
     Vec16uc m1, n1;
 
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur);
+            m1.load_a(Org);
+            n1.load(Cur);
             sad.addSumAbsDiff(m1, n1);
 
-            m1.load_a(piOrg + 16);
+            m1.load_a(Org + 16);
             m1.cutoff(8);
-            n1.load(piCur + 16);
+            n1.load(Cur + 16);
             n1.cutoff(8);
             sad.addSumAbsDiff(m1, n1);
 
-            piOrg += strideOrg;
-            piCur += strideCur;
+            Org += strideOrg;
+            Cur += strideCur;
         }
 
         sum += extend_low(sad) + extend_high(sad);
@@ -249,18 +249,18 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur);
+        m1.load_a(Org);
+        n1.load(Cur);
         sad.addSumAbsDiff(m1, n1);
 
-        m1.load_a(piOrg + 16);
+        m1.load_a(Org + 16);
         m1.cutoff(8);
-        n1.load(piCur + 16);
+        n1.load(Cur + 16);
         n1.cutoff(8);
         sad.addSumAbsDiff(m1, n1);
 
-        piOrg += strideOrg;
-        piCur += strideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     sum += extend_low(sad) + extend_high(sad);
@@ -268,182 +268,182 @@
 }
 
 template<int size>
-ALWAYSINLINE void unrollFunc_32(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_32(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
-    unrollFunc_32<1>(piOrg, strideOrg, piCur, strideCur, sad);
-    unrollFunc_32<size - 1>(piOrg + strideOrg, strideOrg, piCur + strideCur, strideCur, sad);
+    unrollFunc_32<1>(Org, strideOrg, Cur, strideCur, sad);
+    unrollFunc_32<size - 1>(Org + strideOrg, strideOrg, Cur + strideCur, strideCur, sad);
 }
 
 template<>
-ALWAYSINLINE void unrollFunc_32<1>(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_32<1>(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
     Vec16uc m1, n1;
 
-    m1.load_a(piOrg);
-    n1.load(piCur);
+    m1.load_a(Org);
+    n1.load(Cur);
     sad.addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 16);
-    n1.load(piCur + 16);
+    m1.load_a(Org + 16);
+    n1.load(Cur + 16);
     sad.addSumAbsDiff(m1, n1);
 }
 
 template<int ly>
-int sad_32(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_32(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec4i sum(0);
     Vec8us sad;
-    int main_iters = (ly >> 2) << 2;
+    int max_iterators = (ly >> 2) << 2;
     int row;
     if (ly == 4)
     {
         sad = 0;
-        unrollFunc_32<4>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_32<4>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
         return horizontal_add(sum);
     }
-    for (row = 0; row < main_iters; row += 4)
+    for (row = 0; row < max_iterators; row += 4)
     {
         sad = 0;
-        unrollFunc_32<4>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_32<4>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
-        piOrg += strideOrg * 4;
-        piCur += strideCur * 4;
+        Org += strideOrg * 4;
+        Cur += strideCur * 4;
     }
 
     return horizontal_add(sum);
 }
 
 template<int size>
-ALWAYSINLINE void unrollFunc_48(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us *sad)
+ALWAYSINLINE void unrollFunc_48(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us *sad)
 {
-    unrollFunc_48<1>(piOrg, strideOrg, piCur, strideCur, sad);
-    unrollFunc_48<size - 1>(piOrg + strideOrg, strideOrg, piCur + strideCur, strideCur, sad);
+    unrollFunc_48<1>(Org, strideOrg, Cur, strideCur, sad);
+    unrollFunc_48<size - 1>(Org + strideOrg, strideOrg, Cur + strideCur, strideCur, sad);
 }
 
 template<>
-ALWAYSINLINE void unrollFunc_48<1>(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us *sad)
+ALWAYSINLINE void unrollFunc_48<1>(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us *sad)
 {
     Vec16uc m1, n1;
 
-    m1.load_a(piOrg);
-    n1.load(piCur);
+    m1.load_a(Org);
+    n1.load(Cur);
     sad[0].addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 16);
-    n1.load(piCur + 16);
+    m1.load_a(Org + 16);
+    n1.load(Cur + 16);
     sad[0].addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 32);
-    n1.load(piCur + 32);
+    m1.load_a(Org + 32);
+    n1.load(Cur + 32);
     sad[0].addSumAbsDiff(m1, n1);
 }
 
 template<int ly>
-int sad_48(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_48(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec4i sum(0);
     Vec8us sad(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
     if (ly == 4)
     {
-        unrollFunc_48<4>(piOrg, strideOrg, piCur, strideCur, &sad);
+        unrollFunc_48<4>(Org, strideOrg, Cur, strideCur, &sad);
         sum += extend_low(sad) + extend_high(sad);
         return horizontal_add(sum);
     }
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
-        unrollFunc_48<8>(piOrg, strideOrg, piCur, strideCur, &sad);
+        unrollFunc_48<8>(Org, strideOrg, Cur, strideCur, &sad);
         sum += extend_low(sad) + extend_high(sad);
         sad = 0;
-        piOrg += strideOrg * 8;
-        piCur += strideCur * 8;
+        Org += strideOrg * 8;
+        Cur += strideCur * 8;
     }
 
     if (ly & 4)
     {
-        unrollFunc_48<4>(piOrg, strideOrg, piCur, strideCur, &sad);
+        unrollFunc_48<4>(Org, strideOrg, Cur, strideCur, &sad);
         sum += extend_low(sad) + extend_high(sad);
     }
     return horizontal_add(sum);
 }
 
 template<int size>
-ALWAYSINLINE void unrollFunc_64(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_64(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
-    unrollFunc_64<1>(piOrg, strideOrg, piCur, strideCur, sad);
-    unrollFunc_64<size - 1>(piOrg + strideOrg, strideOrg, piCur + strideCur, strideCur, sad);
+    unrollFunc_64<1>(Org, strideOrg, Cur, strideCur, sad);
+    unrollFunc_64<size - 1>(Org + strideOrg, strideOrg, Cur + strideCur, strideCur, sad);
 }
 
 template<>
-ALWAYSINLINE void unrollFunc_64<1>(pixel *piOrg, intptr_t strideOrg, pixel *piCur, intptr_t strideCur, Vec8us& sad)
+ALWAYSINLINE void unrollFunc_64<1>(pixel *Org, intptr_t strideOrg, pixel *Cur, intptr_t strideCur, Vec8us& sad)
 {
     Vec16uc m1, n1;
 
-    m1.load_a(piOrg);
-    n1.load(piCur);
+    m1.load_a(Org);
+    n1.load(Cur);
     sad.addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 16);
-    n1.load(piCur + 16);
+    m1.load_a(Org + 16);
+    n1.load(Cur + 16);
     sad.addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 32);
-    n1.load(piCur + 32);
+    m1.load_a(Org + 32);
+    n1.load(Cur + 32);
     sad.addSumAbsDiff(m1, n1);
 
-    m1.load_a(piOrg + 48);
-    n1.load(piCur + 48);
+    m1.load_a(Org + 48);
+    n1.load(Cur + 48);
     sad.addSumAbsDiff(m1, n1);
 }
 
 template<int ly>
-int sad_64(pixel * piOrg, intptr_t strideOrg, pixel * piCur, intptr_t strideCur)
+int sad_64(pixel * Org, intptr_t strideOrg, pixel * Cur, intptr_t strideCur)
 {
     Vec4i sum(0);
     Vec8us sad;
-    int main_iters = (ly >> 2) << 2;
+    int max_iterators = (ly >> 2) << 2;
     int row;
-    for (row = 0; row < main_iters; row += 4)
+    for (row = 0; row < max_iterators; row += 4)
     {
         sad = 0;
-        unrollFunc_64<4>(piOrg, strideOrg, piCur, strideCur, sad);
+        unrollFunc_64<4>(Org, strideOrg, Cur, strideCur, sad);
         sum += extend_low(sad) + extend_high(sad);
-        piOrg += strideOrg * 4;
-        piCur += strideCur * 4;
+        Org += strideOrg * 4;
+        Cur += strideCur * 4;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-void sad_x3_4(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_4(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.fromUint32(*(uint32_t*)piOrg);
-            n1.fromUint32(*(uint32_t*)piCur1);
-            n2.fromUint32(*(uint32_t*)piCur2);
-            n3.fromUint32(*(uint32_t*)piCur3);
+            m1.fromUint32(*(uint32_t*)Org);
+            n1.fromUint32(*(uint32_t*)Cur1);
+            n2.fromUint32(*(uint32_t*)Cur2);
+            n3.fromUint32(*(uint32_t*)Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -456,19 +456,19 @@
 
     while (row++ < ly)
     {
-        m1.fromUint32(*(uint32_t*)piOrg);
-        n1.fromUint32(*(uint32_t*)piCur1);
-        n2.fromUint32(*(uint32_t*)piCur2);
-        n3.fromUint32(*(uint32_t*)piCur3);
+        m1.fromUint32(*(uint32_t*)Org);
+        n1.fromUint32(*(uint32_t*)Cur1);
+        n2.fromUint32(*(uint32_t*)Cur2);
+        n3.fromUint32(*(uint32_t*)Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -482,32 +482,32 @@
 
 /* For performance - This function assumes that the *last load* can access 16 elements. */
 template<int ly>
-void sad_x3_8(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_8(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -520,19 +520,19 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -546,36 +546,36 @@
 
 /* For performance - This function assumes that the *last load* can access 16 elements. */
 template<int ly>
-void sad_x3_12(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_12(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
+            m1.load_a(Org);
             m1.cutoff(12);
-            n1.load(piCur1);
+            n1.load(Cur1);
             n1.cutoff(12);
-            n2.load(piCur2);
+            n2.load(Cur2);
             n2.cutoff(12);
-            n3.load(piCur3);
+            n3.load(Cur3);
             n3.cutoff(12);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -588,23 +588,23 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
+        m1.load_a(Org);
         m1.cutoff(12);
-        n1.load(piCur1);
+        n1.load(Cur1);
         n1.cutoff(12);
-        n2.load(piCur2);
+        n2.load(Cur2);
         n2.cutoff(12);
-        n3.load(piCur3);
+        n3.load(Cur3);
         n3.cutoff(12);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -617,32 +617,32 @@
 }
 
 template<int ly>
-void sad_x3_16(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_16(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -655,19 +655,19 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -680,45 +680,45 @@
 }
 
 template<int ly>
-void sad_x3_24(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_24(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 16);
+            m1.load_a(Org + 16);
             m1.cutoff(8);
-            n1.load(piCur1 + 16);
+            n1.load(Cur1 + 16);
             n1.cutoff(8);
-            n2.load(piCur2 + 16);
+            n2.load(Cur2 + 16);
             n2.cutoff(8);
-            n3.load(piCur3 + 16);
+            n3.load(Cur3 + 16);
             n3.cutoff(8);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -731,32 +731,32 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        m1.load_a(piOrg + 16);
+        m1.load_a(Org + 16);
         m1.cutoff(8);
-        n1.load(piCur1 + 16);
+        n1.load(Cur1 + 16);
         n1.cutoff(8);
-        n2.load(piCur2 + 16);
+        n2.load(Cur2 + 16);
         n2.cutoff(8);
-        n3.load(piCur3 + 16);
+        n3.load(Cur3 + 16);
         n3.cutoff(8);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -769,41 +769,41 @@
 }
 
 template<int ly>
-void sad_x3_32(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_32(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -816,28 +816,28 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur1 + 16);
-        n2.load(piCur2 + 16);
-        n3.load(piCur3 + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur1 + 16);
+        n2.load(Cur2 + 16);
+        n3.load(Cur3 + 16);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -850,50 +850,50 @@
 }
 
 template<int ly>
-void sad_x3_48(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_48(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
     Vec4i sum1(0), sum2(0), sum3(0);
     Vec8us sad1(0), sad2(0), sad3(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -906,37 +906,37 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur1 + 16);
-        n2.load(piCur2 + 16);
-        n3.load(piCur3 + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur1 + 16);
+        n2.load(Cur2 + 16);
+        n3.load(Cur3 + 16);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        m1.load_a(piOrg + 32);
-        n1.load(piCur1 + 32);
-        n2.load(piCur2 + 32);
-        n3.load(piCur3 + 32);
+        m1.load_a(Org + 32);
+        n1.load(Cur1 + 32);
+        n2.load(Cur2 + 32);
+        n3.load(Cur3 + 32);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -949,7 +949,7 @@
 }
 
 template<int ly>
-void sad_x3_64(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, intptr_t strideCur, int *res)
+void sad_x3_64(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3;
 
@@ -961,46 +961,46 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            m1.load_a(piOrg + 48);
-            n1.load(piCur1 + 48);
-            n2.load(piCur2 + 48);
-            n3.load(piCur3 + 48);
+            m1.load_a(Org + 48);
+            n1.load(Cur1 + 48);
+            n2.load(Cur2 + 48);
+            n3.load(Cur3 + 48);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1017,35 +1017,35 @@
 }
 
 template<int ly>
-void sad_x4_4(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_4(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.fromUint32(*(uint32_t*)piOrg);
-            n1.fromUint32(*(uint32_t*)piCur1);
-            n2.fromUint32(*(uint32_t*)piCur2);
-            n3.fromUint32(*(uint32_t*)piCur3);
-            n4.fromUint32(*(uint32_t*)piCur4);
+            m1.fromUint32(*(uint32_t*)Org);
+            n1.fromUint32(*(uint32_t*)Cur1);
+            n2.fromUint32(*(uint32_t*)Cur2);
+            n3.fromUint32(*(uint32_t*)Cur3);
+            n4.fromUint32(*(uint32_t*)Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -1060,22 +1060,22 @@
 
     while (row++ < ly)
     {
-        m1.fromUint32(*(uint32_t*)piOrg);
-        n1.fromUint32(*(uint32_t*)piCur1);
-        n2.fromUint32(*(uint32_t*)piCur2);
-        n3.fromUint32(*(uint32_t*)piCur3);
-        n4.fromUint32(*(uint32_t*)piCur4);
+        m1.fromUint32(*(uint32_t*)Org);
+        n1.fromUint32(*(uint32_t*)Cur1);
+        n2.fromUint32(*(uint32_t*)Cur2);
+        n3.fromUint32(*(uint32_t*)Cur3);
+        n4.fromUint32(*(uint32_t*)Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -1090,35 +1090,35 @@
 }
 
 template<int ly>
-void sad_x4_8(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_8(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1);
@@ -1133,22 +1133,22 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1);
@@ -1164,28 +1164,28 @@
 
 /* For performance - This function assumes that the *last load* can access 16 elements. */
 template<int ly>
-void sad_x4_12(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_12(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
+            m1.load_a(Org);
             m1.cutoff(12);
-            n1.load(piCur1);
+            n1.load(Cur1);
             n1.cutoff(12);
-            n2.load(piCur2);
+            n2.load(Cur2);
             n2.cutoff(12);
-            n3.load(piCur3);
+            n3.load(Cur3);
             n3.cutoff(12);
-            n4.load(piCur4);
+            n4.load(Cur4);
             n4.cutoff(12);
 
             sad1.addSumAbsDiff(m1, n1);
@@ -1193,11 +1193,11 @@
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1212,15 +1212,15 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
+        m1.load_a(Org);
         m1.cutoff(12);
-        n1.load(piCur1);
+        n1.load(Cur1);
         n1.cutoff(12);
-        n2.load(piCur2);
+        n2.load(Cur2);
         n2.cutoff(12);
-        n3.load(piCur3);
+        n3.load(Cur3);
         n3.cutoff(12);
-        n4.load(piCur4);
+        n4.load(Cur4);
         n4.cutoff(12);
 
         sad1.addSumAbsDiff(m1, n1);
@@ -1228,11 +1228,11 @@
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1247,35 +1247,35 @@
 }
 
 template<int ly>
-void sad_x4_16(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_16(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1290,22 +1290,22 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1320,39 +1320,39 @@
 }
 
 template<int ly>
-void sad_x4_24(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_24(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 4) << 4;
+    int max_iterators = (ly >> 4) << 4;
     int row;
 
-    for (row = 0; row < main_iters; row += 16)
+    for (row = 0; row < max_iterators; row += 16)
     {
         for (int i = 0; i < 16; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 16);
+            m1.load_a(Org + 16);
             m1.cutoff(8);
-            n1.load(piCur1 + 16);
+            n1.load(Cur1 + 16);
             n1.cutoff(8);
-            n2.load(piCur2 + 16);
+            n2.load(Cur2 + 16);
             n2.cutoff(8);
-            n3.load(piCur3 + 16);
+            n3.load(Cur3 + 16);
             n3.cutoff(8);
-            n4.load(piCur4 + 16);
+            n4.load(Cur4 + 16);
             n4.cutoff(8);
 
             sad1.addSumAbsDiff(m1, n1);
@@ -1360,11 +1360,11 @@
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1379,26 +1379,26 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        m1.load_a(piOrg + 16);
+        m1.load_a(Org + 16);
         m1.cutoff(8);
-        n1.load(piCur1 + 16);
+        n1.load(Cur1 + 16);
         n1.cutoff(8);
-        n2.load(piCur2 + 16);
+        n2.load(Cur2 + 16);
         n2.cutoff(8);
-        n3.load(piCur3 + 16);
+        n3.load(Cur3 + 16);
         n3.cutoff(8);
-        n4.load(piCur4 + 16);
+        n4.load(Cur4 + 16);
         n4.cutoff(8);
 
         sad1.addSumAbsDiff(m1, n1);
@@ -1406,11 +1406,11 @@
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1425,46 +1425,46 @@
 }
 
 template<int ly>
-void sad_x4_32(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_32(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1479,33 +1479,33 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur1 + 16);
-        n2.load(piCur2 + 16);
-        n3.load(piCur3 + 16);
-        n4.load(piCur4 + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur1 + 16);
+        n2.load(Cur2 + 16);
+        n3.load(Cur3 + 16);
+        n4.load(Cur4 + 16);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1520,57 +1520,57 @@
 }
 
 template<int ly>
-void sad_x4_48(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_48(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
     Vec4i sum1(0), sum2(0), sum3(0), sum4(0);
     Vec8us sad1(0), sad2(0), sad3(0), sad4(0);
-    int main_iters = (ly >> 3) << 3;
+    int max_iterators = (ly >> 3) << 3;
     int row;
 
-    for (row = 0; row < main_iters; row += 8)
+    for (row = 0; row < max_iterators; row += 8)
     {
         for (int i = 0; i < 8; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
-            n4.load(piCur4 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
+            n4.load(Cur4 + 32);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1585,44 +1585,44 @@
 
     while (row++ < ly)
     {
-        m1.load_a(piOrg);
-        n1.load(piCur1);
-        n2.load(piCur2);
-        n3.load(piCur3);
-        n4.load(piCur4);
+        m1.load_a(Org);
+        n1.load(Cur1);
+        n2.load(Cur2);
+        n3.load(Cur3);
+        n4.load(Cur4);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        m1.load_a(piOrg + 16);
-        n1.load(piCur1 + 16);
-        n2.load(piCur2 + 16);
-        n3.load(piCur3 + 16);
-        n4.load(piCur4 + 16);
+        m1.load_a(Org + 16);
+        n1.load(Cur1 + 16);
+        n2.load(Cur2 + 16);
+        n3.load(Cur3 + 16);
+        n4.load(Cur4 + 16);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        m1.load_a(piOrg + 32);
-        n1.load(piCur1 + 32);
-        n2.load(piCur2 + 32);
-        n3.load(piCur3 + 32);
-        n4.load(piCur4 + 32);
+        m1.load_a(Org + 32);
+        n1.load(Cur1 + 32);
+        n2.load(Cur2 + 32);
+        n3.load(Cur3 + 32);
+        n4.load(Cur4 + 32);
 
         sad1.addSumAbsDiff(m1, n1);
         sad2.addSumAbsDiff(m1, n2);
         sad3.addSumAbsDiff(m1, n3);
         sad4.addSumAbsDiff(m1, n4);
 
-        piOrg += FENC_STRIDE;
-        piCur1 += strideCur;
-        piCur2 += strideCur;
-        piCur3 += strideCur;
-        piCur4 += strideCur;
+        Org += FENC_STRIDE;
+        Cur1 += strideCur;
+        Cur2 += strideCur;
+        Cur3 += strideCur;
+        Cur4 += strideCur;
     }
 
     sum1 += extend_low(sad1) + extend_high(sad1);
@@ -1637,7 +1637,7 @@
 }
 
 template<int ly>
-void sad_x4_64(pixel *piOrg, pixel *piCur1, pixel *piCur2, pixel *piCur3, pixel *piCur4, intptr_t strideCur, int *res)
+void sad_x4_64(pixel *Org, pixel *Cur1, pixel *Cur2, pixel *Cur3, pixel *Cur4, intptr_t strideCur, int *res)
 {
     Vec16uc m1, n1, n2, n3, n4;
 
@@ -1649,55 +1649,55 @@
     {
         for (int i = 0; i < 4; i++)
         {
-            m1.load_a(piOrg);
-            n1.load(piCur1);
-            n2.load(piCur2);
-            n3.load(piCur3);
-            n4.load(piCur4);
+            m1.load_a(Org);
+            n1.load(Cur1);
+            n2.load(Cur2);
+            n3.load(Cur3);
+            n4.load(Cur4);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 16);
-            n1.load(piCur1 + 16);
-            n2.load(piCur2 + 16);
-            n3.load(piCur3 + 16);
-            n4.load(piCur4 + 16);
+            m1.load_a(Org + 16);
+            n1.load(Cur1 + 16);
+            n2.load(Cur2 + 16);
+            n3.load(Cur3 + 16);
+            n4.load(Cur4 + 16);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 32);
-            n1.load(piCur1 + 32);
-            n2.load(piCur2 + 32);
-            n3.load(piCur3 + 32);
-            n4.load(piCur4 + 32);
+            m1.load_a(Org + 32);
+            n1.load(Cur1 + 32);
+            n2.load(Cur2 + 32);
+            n3.load(Cur3 + 32);
+            n4.load(Cur4 + 32);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            m1.load_a(piOrg + 48);
-            n1.load(piCur1 + 48);
-            n2.load(piCur2 + 48);
-            n3.load(piCur3 + 48);
-            n4.load(piCur4 + 48);
+            m1.load_a(Org + 48);
+            n1.load(Cur1 + 48);
+            n2.load(Cur2 + 48);
+            n3.load(Cur3 + 48);
+            n4.load(Cur4 + 48);
 
             sad1.addSumAbsDiff(m1, n1);
             sad2.addSumAbsDiff(m1, n2);
             sad3.addSumAbsDiff(m1, n3);
             sad4.addSumAbsDiff(m1, n4);
 
-            piOrg += FENC_STRIDE;
-            piCur1 += strideCur;
-            piCur2 += strideCur;
-            piCur3 += strideCur;
-            piCur4 += strideCur;
+            Org += FENC_STRIDE;
+            Cur1 += strideCur;
+            Cur2 += strideCur;
+            Cur3 += strideCur;
+            Cur4 += strideCur;
         }
 
         sum1 += extend_low(sad1) + extend_high(sad1);
diff -r dc13d07919db -r c8c33fdca89b source/common/vec/sse.inc
--- a/source/common/vec/sse.inc	Mon Jul 08 11:28:20 2013 +0530
+++ b/source/common/vec/sse.inc	Mon Jul 08 12:50:38 2013 +0530
@@ -26,62 +26,62 @@
 /* intrinsics for when pixel type is uint8_t */
 
 template<int ly>
-int sse_pp4(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp4(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff(0);
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.fromUint32(*(uint32_t*)piOrg);
-        n1.fromUint32(*(uint32_t*)piCur);
+        m1.fromUint32(*(uint32_t*)Org);
+        n1.fromUint32(*(uint32_t*)Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += extend_low(diff);
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_pp8(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp8(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff(0);
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += (extend_low(diff) + (extend_high(diff)));
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_pp12(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp12(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff(0);
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
+        m1.load(Org);
         m1.cutoff(12);
-        n1.load(piCur);
+        n1.load(Cur);
         n1.cutoff(12);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
@@ -91,50 +91,50 @@
         diff = diff * diff;
         sum += (extend_low(diff) + (extend_high(diff)));
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_pp16(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp16(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_pp24(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp24(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -142,32 +142,32 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_low = diff_low * diff_low;
         sum_low += extend_low(diff_low);
         sum_high += extend_high(diff_low);
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_pp32(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp32(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -175,33 +175,33 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_pp48(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp48(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -209,8 +209,8 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -218,33 +218,33 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 32);
-        n1.load(piCur + 32);
+        m1.load(Org + 32);
+        n1.load(Cur + 32);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_pp64(pixel* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_pp64(pixel* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec16uc m1, n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -252,8 +252,8 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -261,8 +261,8 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 32);
-        n1.load(piCur + 32);
+        m1.load(Org + 32);
+        n1.load(Cur + 32);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
@@ -270,55 +270,55 @@
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        m1.load(piOrg + 48);
-        n1.load(piCur + 48);
+        m1.load(Org + 48);
+        n1.load(Cur + 48);
         diff_low = extend_low(m1) - extend_low(n1);
         diff_high = extend_high(m1) - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_ss4(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss4(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss8(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss8(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -326,25 +326,25 @@
         diff = extend_high(m1) - extend_high(n1);
         diff = diff * diff;
         sum += diff;
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss12(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss12(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -352,31 +352,31 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss16(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss16(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -384,8 +384,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -393,25 +393,25 @@
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss24(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss24(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -419,8 +419,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -428,8 +428,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -437,25 +437,25 @@
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss32(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss32(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -463,8 +463,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -472,8 +472,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -481,8 +481,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 24);
-        n1.load(piCur + 24);
+        m1.load(Org + 24);
+        n1.load(Cur + 24);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -490,25 +490,25 @@
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss48(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss48(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -516,8 +516,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -525,8 +525,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -534,8 +534,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 24);
-        n1.load(piCur + 24);
+        m1.load(Org + 24);
+        n1.load(Cur + 24);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -543,8 +543,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 32);
-        n1.load(piCur + 32);
+        m1.load(Org + 32);
+        n1.load(Cur + 32);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -552,8 +552,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 40);
-        n1.load(piCur + 40);
+        m1.load(Org + 40);
+        n1.load(Cur + 40);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -561,25 +561,25 @@
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_ss64(short* piOrg, intptr_t iStrideOrg, short* piCur, intptr_t iStrideCur)
+int sse_ss64(short* Org, intptr_t strideOrg, short* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
 
     Vec4i diff(0);
     Vec8s m1, n1;
     Vec4i sum(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.load(piCur);
+        m1.load(Org);
+        n1.load(Cur);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -587,8 +587,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 8);
-        n1.load(piCur + 8);
+        m1.load(Org + 8);
+        n1.load(Cur + 8);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -596,8 +596,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 16);
-        n1.load(piCur + 16);
+        m1.load(Org + 16);
+        n1.load(Cur + 16);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -605,8 +605,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 24);
-        n1.load(piCur + 24);
+        m1.load(Org + 24);
+        n1.load(Cur + 24);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -614,8 +614,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 32);
-        n1.load(piCur + 32);
+        m1.load(Org + 32);
+        n1.load(Cur + 32);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -623,8 +623,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 40);
-        n1.load(piCur + 40);
+        m1.load(Org + 40);
+        n1.load(Cur + 40);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -632,8 +632,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 48);
-        n1.load(piCur + 48);
+        m1.load(Org + 48);
+        n1.load(Cur + 48);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -641,8 +641,8 @@
         diff = diff * diff;
         sum += diff;
 
-        m1.load(piOrg + 56);
-        n1.load(piCur + 56);
+        m1.load(Org + 56);
+        n1.load(Cur + 56);
         diff = extend_low(m1) - extend_low(n1);
         diff = diff * diff;
         sum += diff;
@@ -650,294 +650,294 @@
         diff = diff * diff;
         sum += diff;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum);
 }
 
 template<int ly>
-int sse_sp4(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp4(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec4i diff_low(0);
     Vec4i sum_low(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        m1.load(piOrg);
-        n1.fromUint32(*(uint32_t*)piCur);
+        m1.load(Org);
+        n1.fromUint32(*(uint32_t*)Cur);
         diff_low = extend_low(m1) - extend_low(extend_low(n1));
         diff_low = diff_low * diff_low;
         sum_low += diff_low;
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low);
 }
 
 template<int ly>
-int sse_sp8(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp8(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
         diff_low = diff_low * diff_low;
         sum_low += extend_low(diff_low);
         sum_high += extend_high(diff_low);
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp12(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp12(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0);
     Vec4i diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
+        n1.load(Cur);
         n1.cutoff(12);
-        m1.load(piOrg);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 8);
+        m1.load(Org + 8);
         diff_high = extend_low(m1) - extend_low(extend_high(n1));
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += extend_low(diff_low);
         sum_high += (extend_high(diff_low) + diff_high);
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp16(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp16(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 8);
+        m1.load(Org + 8);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp24(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp24(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 8);
+        m1.load(Org + 8);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 16);
-        m1.load(piOrg + 16);
+        n1.load(Cur + 16);
+        m1.load(Org + 16);
         diff_low = m1 - extend_low(n1);
         diff_low = diff_low * diff_low;
         sum_low += extend_low(diff_low);
         sum_high += extend_high(diff_low);
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp32(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp32(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 8);
+        m1.load(Org + 8);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 16);
-        m1.load(piOrg + 16);
+        n1.load(Cur + 16);
+        m1.load(Org + 16);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 24);
+        m1.load(Org + 24);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp48(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp48(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load_a(piOrg + 8);
+        m1.load_a(Org + 8);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 16);
-        m1.load(piOrg + 16);
+        n1.load(Cur + 16);
+        m1.load(Org + 16);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 24);
+        m1.load(Org + 24);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 32);
-        m1.load(piOrg + 32);
+        n1.load(Cur + 32);
+        m1.load(Org + 32);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 40);
+        m1.load(Org + 40);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);
 }
 
 template<int ly>
-int sse_sp64(short* piOrg, intptr_t iStrideOrg, pixel* piCur, intptr_t iStrideCur)
+int sse_sp64(short* Org, intptr_t strideOrg, pixel* Cur, intptr_t strideCur)
 {
-    int iRows = ly;
+    int rows = ly;
     Vec8s m1;
     Vec16uc n1;
 
     Vec8us diff_low(0), diff_high(0);
     Vec4i sum_low(0), sum_high(0);
-    for (; iRows != 0; iRows--)
+    for (; rows != 0; rows--)
     {
-        n1.load(piCur);
-        m1.load(piOrg);
+        n1.load(Cur);
+        m1.load(Org);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 8);
+        m1.load(Org + 8);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 16);
-        m1.load(piOrg + 16);
+        n1.load(Cur + 16);
+        m1.load(Org + 16);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 24);
+        m1.load(Org + 24);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 32);
-        m1.load(piOrg + 32);
+        n1.load(Cur + 32);
+        m1.load(Org + 32);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 40);
+        m1.load(Org + 40);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        n1.load(piCur + 48);
-        m1.load(piOrg + 48);
+        n1.load(Cur + 48);
+        m1.load(Org + 48);
         diff_low = m1 - extend_low(n1);
-        m1.load(piOrg + 56);
+        m1.load(Org + 56);
         diff_high = m1 - extend_high(n1);
         diff_low = diff_low * diff_low;
         diff_high = diff_high * diff_high;
         sum_low += (extend_low(diff_low) + extend_low(diff_high));
         sum_high += (extend_high(diff_low) + extend_high(diff_high));
 
-        piOrg += iStrideOrg;
-        piCur += iStrideCur;
+        Org += strideOrg;
+        Cur += strideCur;
     }
 
     return horizontal_add(sum_low) + horizontal_add(sum_high);