[vlc-devel] [PATCH 02/16] stream cipher: refactoring
Serg Chernyavskiy
glenvt18 at gmail.com
Thu Jul 30 14:13:44 CEST 2015
Any suggestions?
2015-07-07 4:22 GMT+03:00 Serg Chernyavskiy <glenvt18 at gmail.com>:
> The stream cipher has 2 phases: initialisation and output. Both share the
> same cipher structure. The original code has two copies of the same function
> with only small differences which is a pain to make changes to. So this .inc
> file is only for that internal use. It is not a header file. What would you
> suggest?
> 1. Rename the file, say to dvbcsa_bs_stream_kernel.int, or any other suffix?
> 2. Add the license header.
> 3. Compile the same file twice with different defines producing two object
> files. Isn't that uglier?
> 4. Something else.
>
>
> 2015-07-06 19:09 GMT+03:00 Jean-Baptiste Kempf <jb at videolan.org>:
>>
>> Do not mame something .inc.
>> Do not add unlicensed files, please.
>>
>> On 26 Jun, glenvt18 wrote :
>> > 1. Move stream cipher kernel code into a separate file included twice.
>> > Don't duplicate code.
>> > 2. Store stream cipher registers in a structure passed as an argument.
>> > 3. Use virtual shift registers for A and B, avoid copying on each round.
>> > ---
>> > src/Makefile.am | 3 +-
>> > src/dvbcsa_bs_stream.c | 410
>> > +++-------------------------------------
>> > src/dvbcsa_bs_stream_kernel.h | 23 +++
>> > src/dvbcsa_bs_stream_kernel.inc | 259 +++++++++++++++++++++++++
>> > 4 files changed, 315 insertions(+), 380 deletions(-)
>> > create mode 100644 src/dvbcsa_bs_stream_kernel.h
>> > create mode 100644 src/dvbcsa_bs_stream_kernel.inc
>> >
>> > diff --git a/src/Makefile.am b/src/Makefile.am
>> > index d01c14e..dec4f55 100644
>> > --- a/src/Makefile.am
>> > +++ b/src/Makefile.am
>> > @@ -7,7 +7,8 @@ libdvbcsa_la_SOURCES = dvbcsa_algo.c dvbcsa_block.c
>> > dvbcsa_bs_algo.c \
>> > dvbcsa_bs_block.c dvbcsa_bs_key.c dvbcsa_bs_stream.c \
>> > dvbcsa_stream.c dvbcsa_bs.h dvbcsa_pv.h dvbcsa_bs_uint64.h \
>> > dvbcsa_bs_uint32.h dvbcsa_bs_mmx.h dvbcsa_bs_sse.h \
>> > - dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c
>> > + dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c \
>> > + dvbcsa_bs_stream_kernel.inc dvbcsa_bs_stream_kernel.h
>> >
>> > if TRANSPOSE_128
>> > libdvbcsa_la_SOURCES += dvbcsa_bs_transpose128.c
>> > diff --git a/src/dvbcsa_bs_stream.c b/src/dvbcsa_bs_stream.c
>> > index 7cb7f09..86e8a6f 100644
>> > --- a/src/dvbcsa_bs_stream.c
>> > +++ b/src/dvbcsa_bs_stream.c
>> > @@ -25,411 +25,63 @@
>> >
>> > #include "dvbcsa/dvbcsa.h"
>> > #include "dvbcsa_bs.h"
>> > +#include "dvbcsa_bs_stream_kernel.h"
>> >
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > -
>> > - tmp0 = BS_XOR (fa, BS_XOR (fb, BS_NOT (BS_OR (BS_XOR (BS_OR (fa, fb),
>> > fc), BS_XOR (fc, fd)))));
>> > - tmp1 = BS_XOR (BS_OR (fa, fb), BS_NOT (BS_AND (fc, BS_OR (fa, BS_XOR
>> > (fb, fd)))));
>> > - tmp2 = BS_XOR (fa, BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd),
>> > fc)));
>> > - tmp3 = BS_XOR (BS_AND (fa, fc), BS_XOR (fa, BS_OR (BS_AND (fa, fb),
>> > fd)));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > -
>> > - tmp0 = BS_XOR (fa, BS_XOR (BS_AND (fb, BS_OR (fc, fd)), BS_XOR (fc,
>> > BS_NOT (fd))));
>> > - tmp1 = BS_OR (BS_AND (fa, BS_XOR (fb, fd)), BS_AND (BS_OR (fa, fb),
>> > fc));
>> > - tmp2 = BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), BS_XOR (fb,
>> > BS_NOT (fc))));
>> > - tmp3 = BS_OR (BS_AND (fa, fd), BS_XOR (fa, BS_XOR (fb, BS_AND (fc,
>> > fd))));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2;
>> > -
>> > - tmp0 = BS_XOR (fa, BS_XOR (fb, BS_XOR (BS_AND (fc, BS_OR (fa, fd)),
>> > fd)));
>> > - tmp1 = BS_XOR (BS_AND (fa, fc), BS_OR (BS_XOR (fa, fd), BS_XOR (BS_OR
>> > (fb, fc), BS_NOT (fd))));
>> > - tmp2 = BS_XOR (fa, BS_XOR (BS_AND (BS_XOR (fb, fc), fd), fc));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (BS_NOT (fe), tmp1));
>> > - *sb = BS_XOR (tmp2, fe);
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2;
>> > -
>> > - tmp0 = BS_XOR (fa, BS_OR (BS_AND (fc, BS_XOR (fa, fd)), BS_XOR (fb,
>> > BS_OR (fc, BS_NOT (fd)))));
>> > - tmp1 = BS_XOR (BS_AND (fa, fb), BS_XOR (fb, BS_XOR (BS_AND (BS_OR
>> > (fa, fc), fd), fc)));
>> > - tmp2 = BS_XOR (fa, BS_OR (BS_AND (fb, fc), BS_XOR (BS_OR (BS_AND (fa,
>> > BS_XOR (fb, fd)), fc), fd)));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, BS_XOR (tmp1, tmp0)));
>> > - *sb = BS_XOR (BS_XOR (*sa, tmp2), fe);
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > -
>> > - tmp0 = BS_OR (BS_XOR (BS_AND (fa, BS_OR (fb, fc)), fb), BS_XOR (BS_OR
>> > (BS_XOR (fa, fc), fd), BS_VAL8(ff)));
>> > - tmp1 = BS_XOR (fb, BS_AND (BS_XOR (fc, fd), BS_XOR (fc, BS_OR (fb,
>> > BS_XOR (fa, fd)))));
>> > - tmp2 = BS_XOR (BS_AND (fa, fc), BS_XOR (fb, BS_AND (BS_OR (fb, BS_XOR
>> > (fa, fc)), fd)));
>> > - tmp3 = BS_OR (BS_AND (BS_XOR (fa, fb), BS_XOR (fc, BS_VAL8(ff))),
>> > fd);
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > -
>> > - tmp0 = BS_XOR (BS_AND (BS_AND (fa, fc), fd), BS_XOR (BS_AND (fb,
>> > BS_OR (fa, fd)), fc));
>> > - tmp1 = BS_NOT (BS_AND (BS_XOR (fa, fc), fd));
>> > - tmp2 = BS_XOR (BS_AND (fa, BS_OR (fb, fc)), BS_XOR (fb, BS_OR (BS_AND
>> > (fb, fc), fd)));
>> > - tmp3 = BS_AND (fc, BS_XOR (BS_AND (fa, BS_XOR (fb, fd)), BS_OR (fb,
>> > fd)));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > -}
>> > -
>> > -static void DVBCSA_INLINE inline
>> > -dvbcsa_bs_stream_sbox7(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > - dvbcsa_bs_word_t fe,
>> > - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > -{
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > -
>> > - tmp0 = BS_XOR (fb, BS_OR (BS_AND (fc, fd), BS_XOR (fa, BS_XOR (fc,
>> > fd))));
>> > - tmp1 = BS_AND (BS_OR (fb, fd), BS_OR (BS_AND (fa, fc), BS_XOR (fb,
>> > BS_XOR (fc, fd))));
>> > - tmp2 = BS_XOR (BS_OR (fa, fb), BS_XOR (BS_AND (fc, BS_OR (fb, fd)),
>> > fd));
>> > - tmp3 = BS_OR (fd, BS_XOR (BS_AND (fa, fc), BS_VAL8(ff)));
>> > -
>> > - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > -}
>> > +#define DVBCSA_BS_STREAM_KERNEL_INIT
>> > +#include "dvbcsa_bs_stream_kernel.inc"
>> > +#undef DVBCSA_BS_STREAM_KERNEL_INIT
>> > +#include "dvbcsa_bs_stream_kernel.inc"
>> >
>> > void
>> > dvbcsa_bs_stream_cipher_batch(const struct dvbcsa_bs_key_s *key,
>> > const struct dvbcsa_bs_batch_s *pcks,
>> > unsigned int maxlen)
>> > {
>> > - dvbcsa_bs_word_t A[10][4];
>> > - dvbcsa_bs_word_t B[10][4];
>> > - dvbcsa_bs_word_t X[4];
>> > - dvbcsa_bs_word_t Y[4];
>> > - dvbcsa_bs_word_t Z[4];
>> > - dvbcsa_bs_word_t D[4];
>> > - dvbcsa_bs_word_t E[4];
>> > - dvbcsa_bs_word_t F[4];
>> > - dvbcsa_bs_word_t p;
>> > - dvbcsa_bs_word_t q;
>> > - dvbcsa_bs_word_t r;
>> > - dvbcsa_bs_word_t in1[4];
>> > - dvbcsa_bs_word_t in2[4];
>> > - dvbcsa_bs_word_t extra_B[4];
>> > - dvbcsa_bs_word_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b, s5a, s5b,
>> > s6a, s6b, s7a, s7b;
>> > - dvbcsa_bs_word_t next_E[4];
>> > - dvbcsa_bs_word_t tmp0, tmp1, tmp3, tmp4;
>> > - dvbcsa_bs_word_t sb[64];
>> > - int h, i, j, k, b;
>> > + struct dvbcsa_bs_stream_regs_s regs __attribute__ ((aligned
>> > (BS_BATCH_BYTES)));
>> >
>> > - dvbcsa_bs_stream_transpose_in(pcks, sb);
>> > + int i, b;
>> > + unsigned int h;
>> > +
>> > + dvbcsa_bs_stream_transpose_in(pcks, regs.sb);
>>
>> >
>> > for (b = 0; b < 4; b++)
>> > {
>> > for (i = 0; i < 8; i++)
>> > {
>> > - A[i][b] = key->stream[b + i * 4];
>> > - B[i][b] = key->stream[b + i * 4 + 32];
>> > + regs.A[32 + i][b] = key->stream[b + i * 4];
>> > + regs.B[32 + i][b] = key->stream[b + i * 4 + 32];
>> > }
>> >
>> > // all other regs = 0
>> > - A[8][b] = BS_VAL8(00);
>> > - A[9][b] = BS_VAL8(00);
>> > - B[8][b] = BS_VAL8(00);
>> > - B[9][b] = BS_VAL8(00);
>> > -
>> > - X[b] = BS_VAL8(00);
>> > - Y[b] = BS_VAL8(00);
>> > - Z[b] = BS_VAL8(00);
>> > - D[b] = BS_VAL8(00);
>> > - E[b] = BS_VAL8(00);
>> > - F[b] = BS_VAL8(00);
>> > + regs.A[32 + 8][b] = BS_VAL8(00);
>> > + regs.A[32 + 9][b] = BS_VAL8(00);
>> > + regs.B[32 + 8][b] = BS_VAL8(00);
>> > + regs.B[32 + 9][b] = BS_VAL8(00);
>> > +
>> > + regs.X[b] = BS_VAL8(00);
>> > + regs.Y[b] = BS_VAL8(00);
>> > + regs.Z[b] = BS_VAL8(00);
>> > + regs.D[b] = BS_VAL8(00);
>> > + regs.E[b] = BS_VAL8(00);
>> > + regs.F[b] = BS_VAL8(00);
>> > }
>> >
>> > - p = BS_VAL8(00);
>> > - q = BS_VAL8(00);
>> > - r = BS_VAL8(00);
>> > + regs.p = BS_VAL8(00);
>> > + regs.q = BS_VAL8(00);
>> > + regs.r = BS_VAL8(00);
>> >
>> > /* Stream INIT */
>> >
>> > - for (i = 0; i < 8; i++)
>> > - {
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - in1[b] = sb[8 * i + 4 + b];
>> > - in2[b] = sb[8 * i + b];
>> > - }
>> > -
>> > - for (j = 0; j < 4; j++)
>> > - {
>> > - dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0],
>> > A[3][0], &s1a, &s1b);
>> > - dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1],
>> > A[1][1], &s2a, &s2b);
>> > - dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2],
>> > A[0][3], &s3a, &s3b);
>> > - dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0],
>> > A[2][3], &s4a, &s4b);
>> > - dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2],
>> > A[4][2], &s5a, &s5b);
>> > - dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3],
>> > A[2][1], &s6a, &s6b);
>> > - dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3],
>> > A[1][2], &s7a, &s7b);
>> > -
>> > - extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]),
>> > B[6][2]), B[8][3]);
>> > - extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]),
>> > B[2][3]), B[3][2]);
>> > - extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]),
>> > B[3][0]), B[4][1]);
>> > - extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]),
>> > B[2][1]), B[7][0]);
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - dvbcsa_bs_word_t A_next;
>> > -
>> > - A_next = BS_XOR (A[9][b], X[b]);
>> > - A_next = BS_XOR (BS_XOR (A_next, D[b]), ((j % 2) ? in2[b] :
>> > in1[b]));
>> > + dvbcsa_bs_stream_cipher_kernel_init(®s);
>> >
>> > - for (k = 9; k > 0; k--)
>> > - A[k][b] = A[k - 1][b];
>> > -
>> > - A[0][b] = A_next;
>> > - }
>> > -
>> > - dvbcsa_bs_word_t B_next[4];
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]);
>> > - B_next[b] = BS_XOR (B_next[b], ((j % 2) ? in1[b] : in2[b]));
>> > - }
>> > -
>> > - tmp3 = B_next[3];
>> > - B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3],
>> > B_next[2]), p));
>> > - B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2],
>> > B_next[1]), p));
>> > - B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1],
>> > B_next[0]), p));
>> > - B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3),
>> > p));
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - for (k = 9; k > 0; k--)
>> > - B[k][b] = B[k - 1][b];
>> > -
>> > - B[0][b] = B_next[b];
>> > - }
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]);
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - next_E[b] = F[b];
>> > -
>> > - tmp0 = BS_XOR (Z[0], E[0]);
>> > - tmp1 = BS_AND (Z[0], E[0]);
>> > - F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r)));
>> > - tmp3 = BS_AND (tmp0, r);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[1], E[1]);
>> > - tmp1 = BS_AND (Z[1], E[1]);
>> > - F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[2], E[2]);
>> > - tmp1 = BS_AND (Z[2], E[2]);
>> > - F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[3], E[3]);
>> > - tmp1 = BS_AND (Z[3], E[3]);
>> > - F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); //
>> > ultimate carry
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - E[b] = next_E[b];
>> > -
>> > - X[0] = s1a;
>> > - X[1] = s2a;
>> > - X[2] = s3b;
>> > - X[3] = s4b;
>> > - Y[0] = s3a;
>> > - Y[1] = s4a;
>> > - Y[2] = s5b;
>> > - Y[3] = s6b;
>> > - Z[0] = s5a;
>> > - Z[1] = s6a;
>> > - Z[2] = s1b;
>> > - Z[3] = s2b;
>> > - p = s7a;
>> > - q = s7b;
>> > -
>> > - }
>> > -
>> > - }
>> >
>> > /* Stream GEN */
>> >
>> > - for (h = 8; h < maxlen; h++)
>> > + for (h = 8; h < maxlen; h += 8)
>> > {
>> > - dvbcsa_bs_word_t cb[8];
>> > -
>> > - for (j = 0; j < 4; j++)
>> > - {
>> > - dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0],
>> > A[3][0], &s1a, &s1b);
>> > - dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1],
>> > A[1][1], &s2a, &s2b);
>> > - dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2],
>> > A[0][3], &s3a, &s3b);
>> > - dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0],
>> > A[2][3], &s4a, &s4b);
>> > - dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2],
>> > A[4][2], &s5a, &s5b);
>> > - dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3],
>> > A[2][1], &s6a, &s6b);
>> > - dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3],
>> > A[1][2], &s7a, &s7b);
>> > -
>> > - // use 4x4 xor to produce extra nibble for T3
>> > -
>> > - extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]),
>> > B[6][2]), B[8][3]);
>> > - extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]),
>> > B[2][3]), B[3][2]);
>> > - extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]),
>> > B[3][0]), B[4][1]);
>> > - extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]),
>> > B[2][1]), B[7][0]);
>> > -
>> > - // T1 = xor all inputs
>> > - // in1, in2, D are only used in T1 during initialisation, not
>> > generation
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - dvbcsa_bs_word_t A_next;
>> > -
>> > - A_next = BS_XOR (A[9][b], X[b]);
>> > -
>> > - for (k = 9; k > 0; k--)
>> > - A[k][b] = A[k - 1][b];
>> > -
>> > - A[0][b] = A_next;
>> > - }
>> > -
>> > - dvbcsa_bs_word_t B_next[4];
>> > -
>> > - // T2 = xor all inputs
>> > - // in1, in2 are only used in T1 during initialisation, not
>> > generation
>> > - // if p=0, use this, if p=1, rotate the result left
>> > - for (b = 0; b < 4; b++)
>> > - B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]);
>> > -
>> > - // if p=1, rotate left (yes, this is what we're doing)
>> > - tmp3 = B_next[3];
>> > - B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3],
>> > B_next[2]), p));
>> > - B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2],
>> > B_next[1]), p));
>> > - B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1],
>> > B_next[0]), p));
>> > - B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3),
>> > p));
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - {
>> > - for (k = 9; k > 0; k--)
>> > - B[k][b] = B[k - 1][b];
>> > -
>> > - B[0][b] = B_next[b];
>> > - }
>> > -
>> > - // T3 = xor all inputs
>> > - for (b = 0; b < 4; b++)
>> > - D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]);
>> > -
>> > - // T4 = sum, carry of Z + E + r
>> > - for (b = 0; b < 4; b++)
>> > - next_E[b] = F[b];
>> > -
>> > - tmp0 = BS_XOR (Z[0], E[0]);
>> > - tmp1 = BS_AND (Z[0], E[0]);
>> > - F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r)));
>> > - tmp3 = BS_AND (tmp0, r);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[1], E[1]);
>> > - tmp1 = BS_AND (Z[1], E[1]);
>> > - F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[2], E[2]);
>> > - tmp1 = BS_AND (Z[2], E[2]);
>> > - F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - tmp4 = BS_OR (tmp1, tmp3);
>> > -
>> > - tmp0 = BS_XOR (Z[3], E[3]);
>> > - tmp1 = BS_AND (Z[3], E[3]);
>> > - F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4)));
>> > - tmp3 = BS_AND (tmp0, tmp4);
>> > - r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); //
>> > ultimate carry
>> > -
>> > - for (b = 0; b < 4; b++)
>> > - E[b] = next_E[b];
>> > -
>> > - X[0] = s1a;
>> > - X[1] = s2a;
>> > - X[2] = s3b;
>> > - X[3] = s4b;
>> > - Y[0] = s3a;
>> > - Y[1] = s4a;
>> > - Y[2] = s5b;
>> > - Y[3] = s6b;
>> > - Z[0] = s5a;
>> > - Z[1] = s6a;
>> > - Z[2] = s1b;
>> > - Z[3] = s2b;
>> > -
>> > - p = s7a;
>> > - q = s7b;
>> > -
>> > - // require 4 loops per output byte
>> > - // 2 output bits are a function of the 4 bits of D
>> > - // xor 2 by 2
>> > - cb[7 - 2 * j] = BS_XOR (D[2], D[3]);
>> > - cb[6 - 2 * j] = BS_XOR (D[0], D[1]);
>> > - } // EXTERNAL LOOP
>> > -
>> > -
>> > ////////////////////////////////////////////////////////////////////////////////
>> > -
>> > - dvbcsa_bs_stream_transpose_out(pcks, h, cb);
>> > + dvbcsa_bs_stream_cipher_kernel(®s);
>> > + for (i = 0; i < 8; i++)
>> > + dvbcsa_bs_stream_transpose_out(pcks, h + i, regs.cb + i * 8);
>> >
>> > }
>> >
>> > diff --git a/src/dvbcsa_bs_stream_kernel.h
>> > b/src/dvbcsa_bs_stream_kernel.h
>> > new file mode 100644
>> > index 0000000..b582028
>> > --- /dev/null
>> > +++ b/src/dvbcsa_bs_stream_kernel.h
>> > @@ -0,0 +1,23 @@
>> > +#ifndef DVBCSA_BS_STREAM_KERNEL_H_
>> > +#define DVBCSA_BS_STREAM_KERNEL_H_
>> > +
>> > +#include "dvbcsa_bs.h"
>> > +
>> > +struct dvbcsa_bs_stream_regs_s {
>> > + dvbcsa_bs_word_t A[32 + 10][4];
>> > + dvbcsa_bs_word_t B[32 + 10][4];
>> > + dvbcsa_bs_word_t X[4];
>> > + dvbcsa_bs_word_t Y[4];
>> > + dvbcsa_bs_word_t Z[4];
>> > + dvbcsa_bs_word_t D[4];
>> > + dvbcsa_bs_word_t E[4];
>> > + dvbcsa_bs_word_t F[4];
>> > + dvbcsa_bs_word_t sb[64];
>> > + dvbcsa_bs_word_t cb[64];
>> > + dvbcsa_bs_word_t p;
>> > + dvbcsa_bs_word_t q;
>> > + dvbcsa_bs_word_t r;
>> > +};
>> > +
>> > +#endif
>> > +
>> > diff --git a/src/dvbcsa_bs_stream_kernel.inc
>> > b/src/dvbcsa_bs_stream_kernel.inc
>> > new file mode 100644
>> > index 0000000..fcb0232
>> > --- /dev/null
>> > +++ b/src/dvbcsa_bs_stream_kernel.inc
>> > @@ -0,0 +1,259 @@
>> > +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > +
>> > + tmp0 = BS_XOR (fa, BS_XOR (fb, BS_NOT (BS_OR (BS_XOR (BS_OR (fa, fb),
>> > fc), BS_XOR (fc, fd)))));
>> > + tmp1 = BS_XOR (BS_OR (fa, fb), BS_NOT (BS_AND (fc, BS_OR (fa, BS_XOR
>> > (fb, fd)))));
>> > + tmp2 = BS_XOR (fa, BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd),
>> > fc)));
>> > + tmp3 = BS_XOR (BS_AND (fa, fc), BS_XOR (fa, BS_OR (BS_AND (fa, fb),
>> > fd)));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > +
>> > + tmp0 = BS_XOR (fa, BS_XOR (BS_AND (fb, BS_OR (fc, fd)), BS_XOR (fc,
>> > BS_NOT (fd))));
>> > + tmp1 = BS_OR (BS_AND (fa, BS_XOR (fb, fd)), BS_AND (BS_OR (fa, fb),
>> > fc));
>> > + tmp2 = BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), BS_XOR (fb,
>> > BS_NOT (fc))));
>> > + tmp3 = BS_OR (BS_AND (fa, fd), BS_XOR (fa, BS_XOR (fb, BS_AND (fc,
>> > fd))));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2;
>> > +
>> > + tmp0 = BS_XOR (fa, BS_XOR (fb, BS_XOR (BS_AND (fc, BS_OR (fa, fd)),
>> > fd)));
>> > + tmp1 = BS_XOR (BS_AND (fa, fc), BS_OR (BS_XOR (fa, fd), BS_XOR (BS_OR
>> > (fb, fc), BS_NOT (fd))));
>> > + tmp2 = BS_XOR (fa, BS_XOR (BS_AND (BS_XOR (fb, fc), fd), fc));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (BS_NOT (fe), tmp1));
>> > + *sb = BS_XOR (tmp2, fe);
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2;
>> > +
>> > + tmp0 = BS_XOR (fa, BS_OR (BS_AND (fc, BS_XOR (fa, fd)), BS_XOR (fb,
>> > BS_OR (fc, BS_NOT (fd)))));
>> > + tmp1 = BS_XOR (BS_AND (fa, fb), BS_XOR (fb, BS_XOR (BS_AND (BS_OR
>> > (fa, fc), fd), fc)));
>> > + tmp2 = BS_XOR (fa, BS_OR (BS_AND (fb, fc), BS_XOR (BS_OR (BS_AND (fa,
>> > BS_XOR (fb, fd)), fc), fd)));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, BS_XOR (tmp1, tmp0)));
>> > + *sb = BS_XOR (BS_XOR (*sa, tmp2), fe);
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > +
>> > + tmp0 = BS_OR (BS_XOR (BS_AND (fa, BS_OR (fb, fc)), fb), BS_XOR (BS_OR
>> > (BS_XOR (fa, fc), fd), BS_VAL8(ff)));
>> > + tmp1 = BS_XOR (fb, BS_AND (BS_XOR (fc, fd), BS_XOR (fc, BS_OR (fb,
>> > BS_XOR (fa, fd)))));
>> > + tmp2 = BS_XOR (BS_AND (fa, fc), BS_XOR (fb, BS_AND (BS_OR (fb, BS_XOR
>> > (fa, fc)), fd)));
>> > + tmp3 = BS_OR (BS_AND (BS_XOR (fa, fb), BS_XOR (fc, BS_VAL8(ff))),
>> > fd);
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > +
>> > + tmp0 = BS_XOR (BS_AND (BS_AND (fa, fc), fd), BS_XOR (BS_AND (fb,
>> > BS_OR (fa, fd)), fc));
>> > + tmp1 = BS_NOT (BS_AND (BS_XOR (fa, fc), fd));
>> > + tmp2 = BS_XOR (BS_AND (fa, BS_OR (fb, fc)), BS_XOR (fb, BS_OR (BS_AND
>> > (fb, fc), fd)));
>> > + tmp3 = BS_AND (fc, BS_XOR (BS_AND (fa, BS_XOR (fb, fd)), BS_OR (fb,
>> > fd)));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > +}
>> > +
>> > +static void DVBCSA_INLINE inline
>> > +dvbcsa_bs_stream_sbox7(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,
>> > + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,
>> > + dvbcsa_bs_word_t fe,
>> > + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)
>> > +{
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;
>> > +
>> > + tmp0 = BS_XOR (fb, BS_OR (BS_AND (fc, fd), BS_XOR (fa, BS_XOR (fc,
>> > fd))));
>> > + tmp1 = BS_AND (BS_OR (fb, fd), BS_OR (BS_AND (fa, fc), BS_XOR (fb,
>> > BS_XOR (fc, fd))));
>> > + tmp2 = BS_XOR (BS_OR (fa, fb), BS_XOR (BS_AND (fc, BS_OR (fb, fd)),
>> > fd));
>> > + tmp3 = BS_OR (fd, BS_XOR (BS_AND (fa, fc), BS_VAL8(ff)));
>> > +
>> > + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));
>> > + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));
>> > +}
>> > +
>> > +static void
>> > +dvbcsa_bs_stream_cipher_kernel_init(struct dvbcsa_bs_stream_regs_s
>> > *regs)
>> > +
>> > +#else
>> > +
>> > +static void
>> > +dvbcsa_bs_stream_cipher_kernel(struct dvbcsa_bs_stream_regs_s *regs)
>> > +
>> > +#endif
>> > +
>> > +{
>> > + dvbcsa_bs_word_t extra_B[4];
>> > + dvbcsa_bs_word_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b, s5a, s5b,
>> > s6a, s6b, s7a, s7b;
>> > + dvbcsa_bs_word_t next_E[4];
>> > + dvbcsa_bs_word_t tmp0, tmp1, tmp3, tmp4;
>> > + dvbcsa_bs_word_t (*A)[4], (*B)[4];
>> > + int i, j, b;
>> > +
>> > + A = regs->A + 32;
>> > + B = regs->B + 32;
>> > +
>> > + for (i = 0; i < 8; i++)
>> > + {
>> > + for (j = 0; j < 4; j++)
>> > + {
>> > + dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0],
>> > A[3][0], &s1a, &s1b);
>> > + dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1],
>> > A[1][1], &s2a, &s2b);
>> > + dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2],
>> > A[0][3], &s3a, &s3b);
>> > + dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0],
>> > A[2][3], &s4a, &s4b);
>> > + dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2],
>> > A[4][2], &s5a, &s5b);
>> > + dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3],
>> > A[2][1], &s6a, &s6b);
>> > + dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3],
>> > A[1][2], &s7a, &s7b);
>> > +
>> > + // use 4x4 xor to produce extra nibble for T3
>> > +
>> > + extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]),
>> > B[6][2]), B[8][3]);
>> > + extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]),
>> > B[2][3]), B[3][2]);
>> > + extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]),
>> > B[3][0]), B[4][1]);
>> > + extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]),
>> > B[2][1]), B[7][0]);
>> > +
>> > + // T1 = xor all inputs
>> > + // in1, in2, D are only used in T1 during initialisation, not
>> > generation
>> > + for (b = 0; b < 4; b++)
>> > + {
>> > + A[-1][b] = BS_XOR (A[9][b], regs->X[b]);
>> > +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT
>> > + //A[-1][b] = BS_XOR (BS_XOR (A[-1][b], D[b]), ((j % 2) ?
>> > in2[b] : in1[b]));
>> > + A[-1][b] = BS_XOR (BS_XOR (A[-1][b], regs->D[b]), ((j %
>> > 2) ? regs->sb[8 * i + b] : regs->sb[8 * i + 4 + b]));
>> > +#endif
>> > + }
>> > +
>> > + // T2 = xor all inputs
>> > + // in1, in2 are only used in T1 during initialisation, not
>> > generation
>> > + // if p=0, use this, if p=1, rotate the result left
>> > + for (b = 0; b < 4; b++)
>> > + {
>> > + B[-1][b] = BS_XOR (BS_XOR (B[6][b], B[9][b]),
>> > regs->Y[b]);
>> > +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT
>> > + //B[-1][b] = BS_XOR (B[-1][b], ((j % 2) ? in1[b] :
>> > in2[b]));
>> > + B[-1][b] = BS_XOR (B[-1][b], ((j % 2) ? regs->sb[8 * i +
>> > 4 + b]: regs->sb[8 * i + b]));
>> > +#endif
>> > + }
>> > +
>> > + // if p=1, rotate left (yes, this is what we're doing)
>> > + tmp3 = B[-1][3];
>> > + B[-1][3] = BS_XOR (B[-1][3], BS_AND (BS_XOR (B[-1][3],
>> > B[-1][2]), regs->p));
>> > + B[-1][2] = BS_XOR (B[-1][2], BS_AND (BS_XOR (B[-1][2],
>> > B[-1][1]), regs->p));
>> > + B[-1][1] = BS_XOR (B[-1][1], BS_AND (BS_XOR (B[-1][1],
>> > B[-1][0]), regs->p));
>> > + B[-1][0] = BS_XOR (B[-1][0], BS_AND (BS_XOR (B[-1][0], tmp3),
>> > regs->p));
>> > +
>> > + // T3 = xor all inputs
>> > + for (b = 0; b < 4; b++)
>> > + regs->D[b] = BS_XOR (BS_XOR (regs->E[b], regs->Z[b]),
>> > extra_B[b]);
>> > +
>> > + // T4 = sum, carry of Z + E + r
>> > + for (b = 0; b < 4; b++)
>> > + next_E[b] = regs->F[b];
>> > +
>> > + tmp0 = BS_XOR (regs->Z[0], regs->E[0]);
>> > + tmp1 = BS_AND (regs->Z[0], regs->E[0]);
>> > + regs->F[0] = BS_XOR (regs->E[0], BS_AND (regs->q, BS_XOR
>> > (regs->Z[0], regs->r)));
>> > + tmp3 = BS_AND (tmp0, regs->r);
>> > + tmp4 = BS_OR (tmp1, tmp3);
>> > +
>> > + tmp0 = BS_XOR (regs->Z[1], regs->E[1]);
>> > + tmp1 = BS_AND (regs->Z[1], regs->E[1]);
>> > + regs->F[1] = BS_XOR (regs->E[1], BS_AND (regs->q, BS_XOR
>> > (regs->Z[1], tmp4)));
>> > + tmp3 = BS_AND (tmp0, tmp4);
>> > + tmp4 = BS_OR (tmp1, tmp3);
>> > +
>> > + tmp0 = BS_XOR (regs->Z[2], regs->E[2]);
>> > + tmp1 = BS_AND (regs->Z[2], regs->E[2]);
>> > + regs->F[2] = BS_XOR (regs->E[2], BS_AND (regs->q, BS_XOR
>> > (regs->Z[2], tmp4)));
>> > + tmp3 = BS_AND (tmp0, tmp4);
>> > + tmp4 = BS_OR (tmp1, tmp3);
>> > +
>> > + tmp0 = BS_XOR (regs->Z[3], regs->E[3]);
>> > + tmp1 = BS_AND (regs->Z[3], regs->E[3]);
>> > + regs->F[3] = BS_XOR (regs->E[3], BS_AND (regs->q, BS_XOR
>> > (regs->Z[3], tmp4)));
>> > + tmp3 = BS_AND (tmp0, tmp4);
>> > + regs->r = BS_XOR (regs->r, BS_AND (regs->q, BS_XOR (BS_OR
>> > (tmp1, tmp3), regs->r))); // ultimate carry
>> > +
>> > + for (b = 0; b < 4; b++)
>> > + regs->E[b] = next_E[b];
>> > +
>> > + A--;
>> > + B--;
>> > +
>> > + regs->X[0] = s1a;
>> > + regs->X[1] = s2a;
>> > + regs->X[2] = s3b;
>> > + regs->X[3] = s4b;
>> > + regs->Y[0] = s3a;
>> > + regs->Y[1] = s4a;
>> > + regs->Y[2] = s5b;
>> > + regs->Y[3] = s6b;
>> > + regs->Z[0] = s5a;
>> > + regs->Z[1] = s6a;
>> > + regs->Z[2] = s1b;
>> > + regs->Z[3] = s2b;
>> > +
>> > + regs->p = s7a;
>> > + regs->q = s7b;
>> > +
>> > + // require 4 loops per output byte
>> > + // 2 output bits are a function of the 4 bits of D
>> > + // xor 2 by 2
>> > + regs->cb[i * 8 + 7 - 2 * j] = BS_XOR (regs->D[2],
>> > regs->D[3]);
>> > + regs->cb[i * 8 + 6 - 2 * j] = BS_XOR (regs->D[0],
>> > regs->D[1]);
>> > + } // INTERNAL LOOP
>> > + } // EXTERNAL LOOP
>> > +
>> > + for (i = 0; i < 10; i++)
>> > + for (b = 0; b < 4; b++)
>> > + regs->A[32 + i][b] = regs->A[i][b];
>> > + for (i = 0; i < 10; i++)
>> > + for (b = 0; b < 4; b++)
>> > + regs->B[32 + i][b] = regs->B[i][b];
>> > +}
>> > +
>> > --
>> > 1.9.1
>> >
>> > _______________________________________________
>> > vlc-devel mailing list
>> > To unsubscribe or modify your subscription options:
>> > https://mailman.videolan.org/listinfo/vlc-devel
>>
>> --
>> With my kindest regards,
>>
>> --
>> Jean-Baptiste Kempf
>> http://www.jbkempf.com/ - +33 672 704 734
>> Sent from my Electronic Device
>> _______________________________________________
>> vlc-devel mailing list
>> To unsubscribe or modify your subscription options:
>> https://mailman.videolan.org/listinfo/vlc-devel
>
>
More information about the vlc-devel
mailing list