<div dir="ltr"><div><div><div><div>The stream cipher has 2 phases: initialisation and output. Both share the same cipher structure. The original code has two copies of the same function with only small differences which is a pain to make changes to. So this .inc file is only for that internal use. It is not a header file. What would you suggest?<br></div>1. Rename the file, say to <a href="http://dvbcsa_bs_stream_kernel.int">dvbcsa_bs_stream_kernel.int</a>, or any other suffix?<br></div>2. Add the license header.<br></div>3. Compile the same file twice with different defines producing two object files. Isn't that uglier?<br></div>4. Something else.<br><br></div><div class="gmail_extra"><br><div class="gmail_quote">2015-07-06 19:09 GMT+03:00 Jean-Baptiste Kempf <span dir="ltr"><<a href="mailto:jb@videolan.org" target="_blank">jb@videolan.org</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Do not mame something .inc.<br>
Do not add unlicensed files, please.<br>
<br>
On 26 Jun, glenvt18 wrote :<br>
<div><div class="h5">> 1. Move stream cipher kernel code into a separate file included twice.<br>
> Don't duplicate code.<br>
> 2. Store stream cipher registers in a structure passed as an argument.<br>
> 3. Use virtual shift registers for A and B, avoid copying on each round.<br>
> ---<br>
> src/Makefile.am | 3 +-<br>
> src/dvbcsa_bs_stream.c | 410 +++-------------------------------------<br>
> src/dvbcsa_bs_stream_kernel.h | 23 +++<br>
> src/dvbcsa_bs_stream_kernel.inc | 259 +++++++++++++++++++++++++<br>
> 4 files changed, 315 insertions(+), 380 deletions(-)<br>
> create mode 100644 src/dvbcsa_bs_stream_kernel.h<br>
> create mode 100644 src/dvbcsa_bs_stream_kernel.inc<br>
><br>
> diff --git a/src/Makefile.am b/src/Makefile.am<br>
> index d01c14e..dec4f55 100644<br>
> --- a/src/Makefile.am<br>
> +++ b/src/Makefile.am<br>
> @@ -7,7 +7,8 @@ libdvbcsa_la_SOURCES = dvbcsa_algo.c dvbcsa_block.c dvbcsa_bs_algo.c \<br>
> dvbcsa_bs_block.c dvbcsa_bs_key.c dvbcsa_bs_stream.c \<br>
> dvbcsa_stream.c dvbcsa_bs.h dvbcsa_pv.h dvbcsa_bs_uint64.h \<br>
> dvbcsa_bs_uint32.h dvbcsa_bs_mmx.h dvbcsa_bs_sse.h \<br>
> - dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c<br>
> + dvbcsa_bs_altivec.h dvbcsa_bs_transpose.c dvbcsa_key.c \<br>
> + dvbcsa_bs_stream_kernel.inc dvbcsa_bs_stream_kernel.h<br>
><br>
> if TRANSPOSE_128<br>
> libdvbcsa_la_SOURCES += dvbcsa_bs_transpose128.c<br>
> diff --git a/src/dvbcsa_bs_stream.c b/src/dvbcsa_bs_stream.c<br>
> index 7cb7f09..86e8a6f 100644<br>
> --- a/src/dvbcsa_bs_stream.c<br>
> +++ b/src/dvbcsa_bs_stream.c<br>
> @@ -25,411 +25,63 @@<br>
><br>
> #include "dvbcsa/dvbcsa.h"<br>
> #include "dvbcsa_bs.h"<br>
> +#include "dvbcsa_bs_stream_kernel.h"<br>
><br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> -<br>
> - tmp0 = BS_XOR (fa, BS_XOR (fb, BS_NOT (BS_OR (BS_XOR (BS_OR (fa, fb), fc), BS_XOR (fc, fd)))));<br>
> - tmp1 = BS_XOR (BS_OR (fa, fb), BS_NOT (BS_AND (fc, BS_OR (fa, BS_XOR (fb, fd)))));<br>
> - tmp2 = BS_XOR (fa, BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), fc)));<br>
> - tmp3 = BS_XOR (BS_AND (fa, fc), BS_XOR (fa, BS_OR (BS_AND (fa, fb), fd)));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> -<br>
> - tmp0 = BS_XOR (fa, BS_XOR (BS_AND (fb, BS_OR (fc, fd)), BS_XOR (fc, BS_NOT (fd))));<br>
> - tmp1 = BS_OR (BS_AND (fa, BS_XOR (fb, fd)), BS_AND (BS_OR (fa, fb), fc));<br>
> - tmp2 = BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), BS_XOR (fb, BS_NOT (fc))));<br>
> - tmp3 = BS_OR (BS_AND (fa, fd), BS_XOR (fa, BS_XOR (fb, BS_AND (fc, fd))));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2;<br>
> -<br>
> - tmp0 = BS_XOR (fa, BS_XOR (fb, BS_XOR (BS_AND (fc, BS_OR (fa, fd)), fd)));<br>
> - tmp1 = BS_XOR (BS_AND (fa, fc), BS_OR (BS_XOR (fa, fd), BS_XOR (BS_OR (fb, fc), BS_NOT (fd))));<br>
> - tmp2 = BS_XOR (fa, BS_XOR (BS_AND (BS_XOR (fb, fc), fd), fc));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (BS_NOT (fe), tmp1));<br>
> - *sb = BS_XOR (tmp2, fe);<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2;<br>
> -<br>
> - tmp0 = BS_XOR (fa, BS_OR (BS_AND (fc, BS_XOR (fa, fd)), BS_XOR (fb, BS_OR (fc, BS_NOT (fd)))));<br>
> - tmp1 = BS_XOR (BS_AND (fa, fb), BS_XOR (fb, BS_XOR (BS_AND (BS_OR (fa, fc), fd), fc)));<br>
> - tmp2 = BS_XOR (fa, BS_OR (BS_AND (fb, fc), BS_XOR (BS_OR (BS_AND (fa, BS_XOR (fb, fd)), fc), fd)));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, BS_XOR (tmp1, tmp0)));<br>
> - *sb = BS_XOR (BS_XOR (*sa, tmp2), fe);<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> -<br>
> - tmp0 = BS_OR (BS_XOR (BS_AND (fa, BS_OR (fb, fc)), fb), BS_XOR (BS_OR (BS_XOR (fa, fc), fd), BS_VAL8(ff)));<br>
> - tmp1 = BS_XOR (fb, BS_AND (BS_XOR (fc, fd), BS_XOR (fc, BS_OR (fb, BS_XOR (fa, fd)))));<br>
> - tmp2 = BS_XOR (BS_AND (fa, fc), BS_XOR (fb, BS_AND (BS_OR (fb, BS_XOR (fa, fc)), fd)));<br>
> - tmp3 = BS_OR (BS_AND (BS_XOR (fa, fb), BS_XOR (fc, BS_VAL8(ff))), fd);<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> -<br>
> - tmp0 = BS_XOR (BS_AND (BS_AND (fa, fc), fd), BS_XOR (BS_AND (fb, BS_OR (fa, fd)), fc));<br>
> - tmp1 = BS_NOT (BS_AND (BS_XOR (fa, fc), fd));<br>
> - tmp2 = BS_XOR (BS_AND (fa, BS_OR (fb, fc)), BS_XOR (fb, BS_OR (BS_AND (fb, fc), fd)));<br>
> - tmp3 = BS_AND (fc, BS_XOR (BS_AND (fa, BS_XOR (fb, fd)), BS_OR (fb, fd)));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> -}<br>
> -<br>
> -static void DVBCSA_INLINE inline<br>
> -dvbcsa_bs_stream_sbox7(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> - dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> - dvbcsa_bs_word_t fe,<br>
> - dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> -{<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> -<br>
> - tmp0 = BS_XOR (fb, BS_OR (BS_AND (fc, fd), BS_XOR (fa, BS_XOR (fc, fd))));<br>
> - tmp1 = BS_AND (BS_OR (fb, fd), BS_OR (BS_AND (fa, fc), BS_XOR (fb, BS_XOR (fc, fd))));<br>
> - tmp2 = BS_XOR (BS_OR (fa, fb), BS_XOR (BS_AND (fc, BS_OR (fb, fd)), fd));<br>
> - tmp3 = BS_OR (fd, BS_XOR (BS_AND (fa, fc), BS_VAL8(ff)));<br>
> -<br>
> - *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> - *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> -}<br>
> +#define DVBCSA_BS_STREAM_KERNEL_INIT<br>
> +#include "dvbcsa_bs_stream_kernel.inc"<br>
> +#undef DVBCSA_BS_STREAM_KERNEL_INIT<br>
> +#include "dvbcsa_bs_stream_kernel.inc"<br>
><br>
> void<br>
> dvbcsa_bs_stream_cipher_batch(const struct dvbcsa_bs_key_s *key,<br>
> const struct dvbcsa_bs_batch_s *pcks,<br>
> unsigned int maxlen)<br>
> {<br>
> - dvbcsa_bs_word_t A[10][4];<br>
> - dvbcsa_bs_word_t B[10][4];<br>
> - dvbcsa_bs_word_t X[4];<br>
> - dvbcsa_bs_word_t Y[4];<br>
> - dvbcsa_bs_word_t Z[4];<br>
> - dvbcsa_bs_word_t D[4];<br>
> - dvbcsa_bs_word_t E[4];<br>
> - dvbcsa_bs_word_t F[4];<br>
> - dvbcsa_bs_word_t p;<br>
> - dvbcsa_bs_word_t q;<br>
> - dvbcsa_bs_word_t r;<br>
> - dvbcsa_bs_word_t in1[4];<br>
> - dvbcsa_bs_word_t in2[4];<br>
> - dvbcsa_bs_word_t extra_B[4];<br>
> - dvbcsa_bs_word_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b, s5a, s5b, s6a, s6b, s7a, s7b;<br>
> - dvbcsa_bs_word_t next_E[4];<br>
> - dvbcsa_bs_word_t tmp0, tmp1, tmp3, tmp4;<br>
> - dvbcsa_bs_word_t sb[64];<br>
> - int h, i, j, k, b;<br>
> + struct dvbcsa_bs_stream_regs_s regs __attribute__ ((aligned (BS_BATCH_BYTES)));<br>
><br>
> - dvbcsa_bs_stream_transpose_in(pcks, sb);<br>
> + int i, b;<br>
> + unsigned int h;<br>
> +<br>
> + dvbcsa_bs_stream_transpose_in(pcks, <a href="http://regs.sb" rel="noreferrer" target="_blank">regs.sb</a>);<br>
><br>
> for (b = 0; b < 4; b++)<br>
> {<br>
> for (i = 0; i < 8; i++)<br>
> {<br>
> - A[i][b] = key->stream[b + i * 4];<br>
> - B[i][b] = key->stream[b + i * 4 + 32];<br>
> + regs.A[32 + i][b] = key->stream[b + i * 4];<br>
> + regs.B[32 + i][b] = key->stream[b + i * 4 + 32];<br>
> }<br>
><br>
> // all other regs = 0<br>
> - A[8][b] = BS_VAL8(00);<br>
> - A[9][b] = BS_VAL8(00);<br>
> - B[8][b] = BS_VAL8(00);<br>
> - B[9][b] = BS_VAL8(00);<br>
> -<br>
> - X[b] = BS_VAL8(00);<br>
> - Y[b] = BS_VAL8(00);<br>
> - Z[b] = BS_VAL8(00);<br>
> - D[b] = BS_VAL8(00);<br>
> - E[b] = BS_VAL8(00);<br>
> - F[b] = BS_VAL8(00);<br>
> + regs.A[32 + 8][b] = BS_VAL8(00);<br>
> + regs.A[32 + 9][b] = BS_VAL8(00);<br>
> + regs.B[32 + 8][b] = BS_VAL8(00);<br>
> + regs.B[32 + 9][b] = BS_VAL8(00);<br>
> +<br>
> + regs.X[b] = BS_VAL8(00);<br>
> + regs.Y[b] = BS_VAL8(00);<br>
> + regs.Z[b] = BS_VAL8(00);<br>
> + regs.D[b] = BS_VAL8(00);<br>
> + regs.E[b] = BS_VAL8(00);<br>
> + regs.F[b] = BS_VAL8(00);<br>
> }<br>
><br>
> - p = BS_VAL8(00);<br>
> - q = BS_VAL8(00);<br>
> - r = BS_VAL8(00);<br>
> + regs.p = BS_VAL8(00);<br>
> + regs.q = BS_VAL8(00);<br>
> + regs.r = BS_VAL8(00);<br>
><br>
> /* Stream INIT */<br>
><br>
> - for (i = 0; i < 8; i++)<br>
> - {<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - in1[b] = sb[8 * i + 4 + b];<br>
> - in2[b] = sb[8 * i + b];<br>
> - }<br>
> -<br>
> - for (j = 0; j < 4; j++)<br>
> - {<br>
> - dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0], A[3][0], &s1a, &s1b);<br>
> - dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1], A[1][1], &s2a, &s2b);<br>
> - dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2], A[0][3], &s3a, &s3b);<br>
> - dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0], A[2][3], &s4a, &s4b);<br>
> - dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2], A[4][2], &s5a, &s5b);<br>
> - dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3], A[2][1], &s6a, &s6b);<br>
> - dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3], A[1][2], &s7a, &s7b);<br>
> -<br>
> - extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]);<br>
> - extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]);<br>
> - extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]);<br>
> - extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]);<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - dvbcsa_bs_word_t A_next;<br>
> -<br>
> - A_next = BS_XOR (A[9][b], X[b]);<br>
> - A_next = BS_XOR (BS_XOR (A_next, D[b]), ((j % 2) ? in2[b] : in1[b]));<br>
> + dvbcsa_bs_stream_cipher_kernel_init(®s);<br>
><br>
> - for (k = 9; k > 0; k--)<br>
> - A[k][b] = A[k - 1][b];<br>
> -<br>
> - A[0][b] = A_next;<br>
> - }<br>
> -<br>
> - dvbcsa_bs_word_t B_next[4];<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]);<br>
> - B_next[b] = BS_XOR (B_next[b], ((j % 2) ? in1[b] : in2[b]));<br>
> - }<br>
> -<br>
> - tmp3 = B_next[3];<br>
> - B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3], B_next[2]), p));<br>
> - B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2], B_next[1]), p));<br>
> - B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1], B_next[0]), p));<br>
> - B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3), p));<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - for (k = 9; k > 0; k--)<br>
> - B[k][b] = B[k - 1][b];<br>
> -<br>
> - B[0][b] = B_next[b];<br>
> - }<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]);<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - next_E[b] = F[b];<br>
> -<br>
> - tmp0 = BS_XOR (Z[0], E[0]);<br>
> - tmp1 = BS_AND (Z[0], E[0]);<br>
> - F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r)));<br>
> - tmp3 = BS_AND (tmp0, r);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[1], E[1]);<br>
> - tmp1 = BS_AND (Z[1], E[1]);<br>
> - F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[2], E[2]);<br>
> - tmp1 = BS_AND (Z[2], E[2]);<br>
> - F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[3], E[3]);<br>
> - tmp1 = BS_AND (Z[3], E[3]);<br>
> - F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); // ultimate carry<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - E[b] = next_E[b];<br>
> -<br>
> - X[0] = s1a;<br>
> - X[1] = s2a;<br>
> - X[2] = s3b;<br>
> - X[3] = s4b;<br>
> - Y[0] = s3a;<br>
> - Y[1] = s4a;<br>
> - Y[2] = s5b;<br>
> - Y[3] = s6b;<br>
> - Z[0] = s5a;<br>
> - Z[1] = s6a;<br>
> - Z[2] = s1b;<br>
> - Z[3] = s2b;<br>
> - p = s7a;<br>
> - q = s7b;<br>
> -<br>
> - }<br>
> -<br>
> - }<br>
><br>
> /* Stream GEN */<br>
><br>
> - for (h = 8; h < maxlen; h++)<br>
> + for (h = 8; h < maxlen; h += 8)<br>
> {<br>
> - dvbcsa_bs_word_t cb[8];<br>
> -<br>
> - for (j = 0; j < 4; j++)<br>
> - {<br>
> - dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0], A[3][0], &s1a, &s1b);<br>
> - dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1], A[1][1], &s2a, &s2b);<br>
> - dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2], A[0][3], &s3a, &s3b);<br>
> - dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0], A[2][3], &s4a, &s4b);<br>
> - dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2], A[4][2], &s5a, &s5b);<br>
> - dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3], A[2][1], &s6a, &s6b);<br>
> - dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3], A[1][2], &s7a, &s7b);<br>
> -<br>
> - // use 4x4 xor to produce extra nibble for T3<br>
> -<br>
> - extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]);<br>
> - extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]);<br>
> - extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]);<br>
> - extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]);<br>
> -<br>
> - // T1 = xor all inputs<br>
> - // in1, in2, D are only used in T1 during initialisation, not generation<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - dvbcsa_bs_word_t A_next;<br>
> -<br>
> - A_next = BS_XOR (A[9][b], X[b]);<br>
> -<br>
> - for (k = 9; k > 0; k--)<br>
> - A[k][b] = A[k - 1][b];<br>
> -<br>
> - A[0][b] = A_next;<br>
> - }<br>
> -<br>
> - dvbcsa_bs_word_t B_next[4];<br>
> -<br>
> - // T2 = xor all inputs<br>
> - // in1, in2 are only used in T1 during initialisation, not generation<br>
> - // if p=0, use this, if p=1, rotate the result left<br>
> - for (b = 0; b < 4; b++)<br>
> - B_next[b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), Y[b]);<br>
> -<br>
> - // if p=1, rotate left (yes, this is what we're doing)<br>
> - tmp3 = B_next[3];<br>
> - B_next[3] = BS_XOR (B_next[3], BS_AND (BS_XOR (B_next[3], B_next[2]), p));<br>
> - B_next[2] = BS_XOR (B_next[2], BS_AND (BS_XOR (B_next[2], B_next[1]), p));<br>
> - B_next[1] = BS_XOR (B_next[1], BS_AND (BS_XOR (B_next[1], B_next[0]), p));<br>
> - B_next[0] = BS_XOR (B_next[0], BS_AND (BS_XOR (B_next[0], tmp3), p));<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - {<br>
> - for (k = 9; k > 0; k--)<br>
> - B[k][b] = B[k - 1][b];<br>
> -<br>
> - B[0][b] = B_next[b];<br>
> - }<br>
> -<br>
> - // T3 = xor all inputs<br>
> - for (b = 0; b < 4; b++)<br>
> - D[b] = BS_XOR (BS_XOR (E[b], Z[b]), extra_B[b]);<br>
> -<br>
> - // T4 = sum, carry of Z + E + r<br>
> - for (b = 0; b < 4; b++)<br>
> - next_E[b] = F[b];<br>
> -<br>
> - tmp0 = BS_XOR (Z[0], E[0]);<br>
> - tmp1 = BS_AND (Z[0], E[0]);<br>
> - F[0] = BS_XOR (E[0], BS_AND (q, BS_XOR (Z[0], r)));<br>
> - tmp3 = BS_AND (tmp0, r);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[1], E[1]);<br>
> - tmp1 = BS_AND (Z[1], E[1]);<br>
> - F[1] = BS_XOR (E[1], BS_AND (q, BS_XOR (Z[1], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[2], E[2]);<br>
> - tmp1 = BS_AND (Z[2], E[2]);<br>
> - F[2] = BS_XOR (E[2], BS_AND (q, BS_XOR (Z[2], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - tmp4 = BS_OR (tmp1, tmp3);<br>
> -<br>
> - tmp0 = BS_XOR (Z[3], E[3]);<br>
> - tmp1 = BS_AND (Z[3], E[3]);<br>
> - F[3] = BS_XOR (E[3], BS_AND (q, BS_XOR (Z[3], tmp4)));<br>
> - tmp3 = BS_AND (tmp0, tmp4);<br>
> - r = BS_XOR (r, BS_AND (q, BS_XOR (BS_OR (tmp1, tmp3), r))); // ultimate carry<br>
> -<br>
> - for (b = 0; b < 4; b++)<br>
> - E[b] = next_E[b];<br>
> -<br>
> - X[0] = s1a;<br>
> - X[1] = s2a;<br>
> - X[2] = s3b;<br>
> - X[3] = s4b;<br>
> - Y[0] = s3a;<br>
> - Y[1] = s4a;<br>
> - Y[2] = s5b;<br>
> - Y[3] = s6b;<br>
> - Z[0] = s5a;<br>
> - Z[1] = s6a;<br>
> - Z[2] = s1b;<br>
> - Z[3] = s2b;<br>
> -<br>
> - p = s7a;<br>
> - q = s7b;<br>
> -<br>
> - // require 4 loops per output byte<br>
> - // 2 output bits are a function of the 4 bits of D<br>
> - // xor 2 by 2<br>
> - cb[7 - 2 * j] = BS_XOR (D[2], D[3]);<br>
> - cb[6 - 2 * j] = BS_XOR (D[0], D[1]);<br>
> - } // EXTERNAL LOOP<br>
> -<br>
> - ////////////////////////////////////////////////////////////////////////////////<br>
> -<br>
> - dvbcsa_bs_stream_transpose_out(pcks, h, cb);<br>
> + dvbcsa_bs_stream_cipher_kernel(®s);<br>
> + for (i = 0; i < 8; i++)<br>
> + dvbcsa_bs_stream_transpose_out(pcks, h + i, regs.cb + i * 8);<br>
><br>
> }<br>
><br>
> diff --git a/src/dvbcsa_bs_stream_kernel.h b/src/dvbcsa_bs_stream_kernel.h<br>
> new file mode 100644<br>
> index 0000000..b582028<br>
> --- /dev/null<br>
> +++ b/src/dvbcsa_bs_stream_kernel.h<br>
> @@ -0,0 +1,23 @@<br>
> +#ifndef DVBCSA_BS_STREAM_KERNEL_H_<br>
> +#define DVBCSA_BS_STREAM_KERNEL_H_<br>
> +<br>
> +#include "dvbcsa_bs.h"<br>
> +<br>
> +struct dvbcsa_bs_stream_regs_s {<br>
> + dvbcsa_bs_word_t A[32 + 10][4];<br>
> + dvbcsa_bs_word_t B[32 + 10][4];<br>
> + dvbcsa_bs_word_t X[4];<br>
> + dvbcsa_bs_word_t Y[4];<br>
> + dvbcsa_bs_word_t Z[4];<br>
> + dvbcsa_bs_word_t D[4];<br>
> + dvbcsa_bs_word_t E[4];<br>
> + dvbcsa_bs_word_t F[4];<br>
> + dvbcsa_bs_word_t sb[64];<br>
> + dvbcsa_bs_word_t cb[64];<br>
> + dvbcsa_bs_word_t p;<br>
> + dvbcsa_bs_word_t q;<br>
> + dvbcsa_bs_word_t r;<br>
> +};<br>
> +<br>
> +#endif<br>
> +<br>
> diff --git a/src/dvbcsa_bs_stream_kernel.inc b/src/dvbcsa_bs_stream_kernel.inc<br>
> new file mode 100644<br>
> index 0000000..fcb0232<br>
> --- /dev/null<br>
> +++ b/src/dvbcsa_bs_stream_kernel.inc<br>
> @@ -0,0 +1,259 @@<br>
> +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox1(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> +<br>
> + tmp0 = BS_XOR (fa, BS_XOR (fb, BS_NOT (BS_OR (BS_XOR (BS_OR (fa, fb), fc), BS_XOR (fc, fd)))));<br>
> + tmp1 = BS_XOR (BS_OR (fa, fb), BS_NOT (BS_AND (fc, BS_OR (fa, BS_XOR (fb, fd)))));<br>
> + tmp2 = BS_XOR (fa, BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), fc)));<br>
> + tmp3 = BS_XOR (BS_AND (fa, fc), BS_XOR (fa, BS_OR (BS_AND (fa, fb), fd)));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox2(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> +<br>
> + tmp0 = BS_XOR (fa, BS_XOR (BS_AND (fb, BS_OR (fc, fd)), BS_XOR (fc, BS_NOT (fd))));<br>
> + tmp1 = BS_OR (BS_AND (fa, BS_XOR (fb, fd)), BS_AND (BS_OR (fa, fb), fc));<br>
> + tmp2 = BS_XOR (BS_AND (fb, fd), BS_OR (BS_AND (fa, fd), BS_XOR (fb, BS_NOT (fc))));<br>
> + tmp3 = BS_OR (BS_AND (fa, fd), BS_XOR (fa, BS_XOR (fb, BS_AND (fc, fd))));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox3(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2;<br>
> +<br>
> + tmp0 = BS_XOR (fa, BS_XOR (fb, BS_XOR (BS_AND (fc, BS_OR (fa, fd)), fd)));<br>
> + tmp1 = BS_XOR (BS_AND (fa, fc), BS_OR (BS_XOR (fa, fd), BS_XOR (BS_OR (fb, fc), BS_NOT (fd))));<br>
> + tmp2 = BS_XOR (fa, BS_XOR (BS_AND (BS_XOR (fb, fc), fd), fc));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (BS_NOT (fe), tmp1));<br>
> + *sb = BS_XOR (tmp2, fe);<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox4(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2;<br>
> +<br>
> + tmp0 = BS_XOR (fa, BS_OR (BS_AND (fc, BS_XOR (fa, fd)), BS_XOR (fb, BS_OR (fc, BS_NOT (fd)))));<br>
> + tmp1 = BS_XOR (BS_AND (fa, fb), BS_XOR (fb, BS_XOR (BS_AND (BS_OR (fa, fc), fd), fc)));<br>
> + tmp2 = BS_XOR (fa, BS_OR (BS_AND (fb, fc), BS_XOR (BS_OR (BS_AND (fa, BS_XOR (fb, fd)), fc), fd)));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, BS_XOR (tmp1, tmp0)));<br>
> + *sb = BS_XOR (BS_XOR (*sa, tmp2), fe);<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox5(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> +<br>
> + tmp0 = BS_OR (BS_XOR (BS_AND (fa, BS_OR (fb, fc)), fb), BS_XOR (BS_OR (BS_XOR (fa, fc), fd), BS_VAL8(ff)));<br>
> + tmp1 = BS_XOR (fb, BS_AND (BS_XOR (fc, fd), BS_XOR (fc, BS_OR (fb, BS_XOR (fa, fd)))));<br>
> + tmp2 = BS_XOR (BS_AND (fa, fc), BS_XOR (fb, BS_AND (BS_OR (fb, BS_XOR (fa, fc)), fd)));<br>
> + tmp3 = BS_OR (BS_AND (BS_XOR (fa, fb), BS_XOR (fc, BS_VAL8(ff))), fd);<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox6(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> +<br>
> + tmp0 = BS_XOR (BS_AND (BS_AND (fa, fc), fd), BS_XOR (BS_AND (fb, BS_OR (fa, fd)), fc));<br>
> + tmp1 = BS_NOT (BS_AND (BS_XOR (fa, fc), fd));<br>
> + tmp2 = BS_XOR (BS_AND (fa, BS_OR (fb, fc)), BS_XOR (fb, BS_OR (BS_AND (fb, fc), fd)));<br>
> + tmp3 = BS_AND (fc, BS_XOR (BS_AND (fa, BS_XOR (fb, fd)), BS_OR (fb, fd)));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> +}<br>
> +<br>
> +static void DVBCSA_INLINE inline<br>
> +dvbcsa_bs_stream_sbox7(dvbcsa_bs_word_t fa, dvbcsa_bs_word_t fb,<br>
> + dvbcsa_bs_word_t fc, dvbcsa_bs_word_t fd,<br>
> + dvbcsa_bs_word_t fe,<br>
> + dvbcsa_bs_word_t *sa, dvbcsa_bs_word_t *sb)<br>
> +{<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp2, tmp3;<br>
> +<br>
> + tmp0 = BS_XOR (fb, BS_OR (BS_AND (fc, fd), BS_XOR (fa, BS_XOR (fc, fd))));<br>
> + tmp1 = BS_AND (BS_OR (fb, fd), BS_OR (BS_AND (fa, fc), BS_XOR (fb, BS_XOR (fc, fd))));<br>
> + tmp2 = BS_XOR (BS_OR (fa, fb), BS_XOR (BS_AND (fc, BS_OR (fb, fd)), fd));<br>
> + tmp3 = BS_OR (fd, BS_XOR (BS_AND (fa, fc), BS_VAL8(ff)));<br>
> +<br>
> + *sa = BS_XOR (tmp0, BS_AND (fe, tmp1));<br>
> + *sb = BS_XOR (tmp2, BS_AND (fe, tmp3));<br>
> +}<br>
> +<br>
> +static void<br>
> +dvbcsa_bs_stream_cipher_kernel_init(struct dvbcsa_bs_stream_regs_s *regs)<br>
> +<br>
> +#else<br>
> +<br>
> +static void<br>
> +dvbcsa_bs_stream_cipher_kernel(struct dvbcsa_bs_stream_regs_s *regs)<br>
> +<br>
> +#endif<br>
> +<br>
> +{<br>
> + dvbcsa_bs_word_t extra_B[4];<br>
> + dvbcsa_bs_word_t s1a, s1b, s2a, s2b, s3a, s3b, s4a, s4b, s5a, s5b, s6a, s6b, s7a, s7b;<br>
> + dvbcsa_bs_word_t next_E[4];<br>
> + dvbcsa_bs_word_t tmp0, tmp1, tmp3, tmp4;<br>
> + dvbcsa_bs_word_t (*A)[4], (*B)[4];<br>
> + int i, j, b;<br>
> +<br>
> + A = regs->A + 32;<br>
> + B = regs->B + 32;<br>
> +<br>
> + for (i = 0; i < 8; i++)<br>
> + {<br>
> + for (j = 0; j < 4; j++)<br>
> + {<br>
> + dvbcsa_bs_stream_sbox1(A[0][2], A[5][1], A[6][3], A[8][0], A[3][0], &s1a, &s1b);<br>
> + dvbcsa_bs_stream_sbox2(A[2][2], A[5][3], A[6][0], A[8][1], A[1][1], &s2a, &s2b);<br>
> + dvbcsa_bs_stream_sbox3(A[1][0], A[4][1], A[4][3], A[5][2], A[0][3], &s3a, &s3b);<br>
> + dvbcsa_bs_stream_sbox4(A[0][1], A[1][3], A[3][2], A[7][0], A[2][3], &s4a, &s4b);<br>
> + dvbcsa_bs_stream_sbox5(A[3][3], A[5][0], A[7][1], A[8][2], A[4][2], &s5a, &s5b);<br>
> + dvbcsa_bs_stream_sbox6(A[3][1], A[4][0], A[6][2], A[8][3], A[2][1], &s6a, &s6b);<br>
> + dvbcsa_bs_stream_sbox7(A[2][0], A[6][1], A[7][2], A[7][3], A[1][2], &s7a, &s7b);<br>
> +<br>
> + // use 4x4 xor to produce extra nibble for T3<br>
> +<br>
> + extra_B[3] = BS_XOR (BS_XOR (BS_XOR (B[2][0], B[5][1]), B[6][2]), B[8][3]);<br>
> + extra_B[2] = BS_XOR (BS_XOR (BS_XOR (B[5][0], B[7][1]), B[2][3]), B[3][2]);<br>
> + extra_B[1] = BS_XOR (BS_XOR (BS_XOR (B[4][3], B[7][2]), B[3][0]), B[4][1]);<br>
> + extra_B[0] = BS_XOR (BS_XOR (BS_XOR (B[8][2], B[5][3]), B[2][1]), B[7][0]);<br>
> +<br>
> + // T1 = xor all inputs<br>
> + // in1, in2, D are only used in T1 during initialisation, not generation<br>
> + for (b = 0; b < 4; b++)<br>
> + {<br>
> + A[-1][b] = BS_XOR (A[9][b], regs->X[b]);<br>
> +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT<br>
> + //A[-1][b] = BS_XOR (BS_XOR (A[-1][b], D[b]), ((j % 2) ? in2[b] : in1[b]));<br>
> + A[-1][b] = BS_XOR (BS_XOR (A[-1][b], regs->D[b]), ((j % 2) ? regs->sb[8 * i + b] : regs->sb[8 * i + 4 + b]));<br>
> +#endif<br>
> + }<br>
> +<br>
> + // T2 = xor all inputs<br>
> + // in1, in2 are only used in T1 during initialisation, not generation<br>
> + // if p=0, use this, if p=1, rotate the result left<br>
> + for (b = 0; b < 4; b++)<br>
> + {<br>
> + B[-1][b] = BS_XOR (BS_XOR (B[6][b], B[9][b]), regs->Y[b]);<br>
> +#ifdef DVBCSA_BS_STREAM_KERNEL_INIT<br>
> + //B[-1][b] = BS_XOR (B[-1][b], ((j % 2) ? in1[b] : in2[b]));<br>
> + B[-1][b] = BS_XOR (B[-1][b], ((j % 2) ? regs->sb[8 * i + 4 + b]: regs->sb[8 * i + b]));<br>
> +#endif<br>
> + }<br>
> +<br>
> + // if p=1, rotate left (yes, this is what we're doing)<br>
> + tmp3 = B[-1][3];<br>
> + B[-1][3] = BS_XOR (B[-1][3], BS_AND (BS_XOR (B[-1][3], B[-1][2]), regs->p));<br>
> + B[-1][2] = BS_XOR (B[-1][2], BS_AND (BS_XOR (B[-1][2], B[-1][1]), regs->p));<br>
> + B[-1][1] = BS_XOR (B[-1][1], BS_AND (BS_XOR (B[-1][1], B[-1][0]), regs->p));<br>
> + B[-1][0] = BS_XOR (B[-1][0], BS_AND (BS_XOR (B[-1][0], tmp3), regs->p));<br>
> +<br>
> + // T3 = xor all inputs<br>
> + for (b = 0; b < 4; b++)<br>
> + regs->D[b] = BS_XOR (BS_XOR (regs->E[b], regs->Z[b]), extra_B[b]);<br>
> +<br>
> + // T4 = sum, carry of Z + E + r<br>
> + for (b = 0; b < 4; b++)<br>
> + next_E[b] = regs->F[b];<br>
> +<br>
> + tmp0 = BS_XOR (regs->Z[0], regs->E[0]);<br>
> + tmp1 = BS_AND (regs->Z[0], regs->E[0]);<br>
> + regs->F[0] = BS_XOR (regs->E[0], BS_AND (regs->q, BS_XOR (regs->Z[0], regs->r)));<br>
> + tmp3 = BS_AND (tmp0, regs->r);<br>
> + tmp4 = BS_OR (tmp1, tmp3);<br>
> +<br>
> + tmp0 = BS_XOR (regs->Z[1], regs->E[1]);<br>
> + tmp1 = BS_AND (regs->Z[1], regs->E[1]);<br>
> + regs->F[1] = BS_XOR (regs->E[1], BS_AND (regs->q, BS_XOR (regs->Z[1], tmp4)));<br>
> + tmp3 = BS_AND (tmp0, tmp4);<br>
> + tmp4 = BS_OR (tmp1, tmp3);<br>
> +<br>
> + tmp0 = BS_XOR (regs->Z[2], regs->E[2]);<br>
> + tmp1 = BS_AND (regs->Z[2], regs->E[2]);<br>
> + regs->F[2] = BS_XOR (regs->E[2], BS_AND (regs->q, BS_XOR (regs->Z[2], tmp4)));<br>
> + tmp3 = BS_AND (tmp0, tmp4);<br>
> + tmp4 = BS_OR (tmp1, tmp3);<br>
> +<br>
> + tmp0 = BS_XOR (regs->Z[3], regs->E[3]);<br>
> + tmp1 = BS_AND (regs->Z[3], regs->E[3]);<br>
> + regs->F[3] = BS_XOR (regs->E[3], BS_AND (regs->q, BS_XOR (regs->Z[3], tmp4)));<br>
> + tmp3 = BS_AND (tmp0, tmp4);<br>
> + regs->r = BS_XOR (regs->r, BS_AND (regs->q, BS_XOR (BS_OR (tmp1, tmp3), regs->r))); // ultimate carry<br>
> +<br>
> + for (b = 0; b < 4; b++)<br>
> + regs->E[b] = next_E[b];<br>
> +<br>
> + A--;<br>
> + B--;<br>
> +<br>
> + regs->X[0] = s1a;<br>
> + regs->X[1] = s2a;<br>
> + regs->X[2] = s3b;<br>
> + regs->X[3] = s4b;<br>
> + regs->Y[0] = s3a;<br>
> + regs->Y[1] = s4a;<br>
> + regs->Y[2] = s5b;<br>
> + regs->Y[3] = s6b;<br>
> + regs->Z[0] = s5a;<br>
> + regs->Z[1] = s6a;<br>
> + regs->Z[2] = s1b;<br>
> + regs->Z[3] = s2b;<br>
> +<br>
> + regs->p = s7a;<br>
> + regs->q = s7b;<br>
> +<br>
> + // require 4 loops per output byte<br>
> + // 2 output bits are a function of the 4 bits of D<br>
> + // xor 2 by 2<br>
> + regs->cb[i * 8 + 7 - 2 * j] = BS_XOR (regs->D[2], regs->D[3]);<br>
> + regs->cb[i * 8 + 6 - 2 * j] = BS_XOR (regs->D[0], regs->D[1]);<br>
> + } // INTERNAL LOOP<br>
> + } // EXTERNAL LOOP<br>
> +<br>
> + for (i = 0; i < 10; i++)<br>
> + for (b = 0; b < 4; b++)<br>
> + regs->A[32 + i][b] = regs->A[i][b];<br>
> + for (i = 0; i < 10; i++)<br>
> + for (b = 0; b < 4; b++)<br>
> + regs->B[32 + i][b] = regs->B[i][b];<br>
> +}<br>
> +<br>
> --<br>
> 1.9.1<br>
><br>
</div></div>> _______________________________________________<br>
> vlc-devel mailing list<br>
> To unsubscribe or modify your subscription options:<br>
> <a href="https://mailman.videolan.org/listinfo/vlc-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/vlc-devel</a><br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
With my kindest regards,<br>
<br>
--<br>
Jean-Baptiste Kempf<br>
<a href="http://www.jbkempf.com/" rel="noreferrer" target="_blank">http://www.jbkempf.com/</a> - +33 672 704 734<br>
Sent from my Electronic Device<br>
_______________________________________________<br>
vlc-devel mailing list<br>
To unsubscribe or modify your subscription options:<br>
<a href="https://mailman.videolan.org/listinfo/vlc-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/vlc-devel</a><br>
</font></span></blockquote></div><br></div>