/***********************************************************************
 * ratpoints-2.1.2                                                     *
 *  - A program to find rational points on hyperelliptic curves        *
 * Copyright (C) 2008, 2009  Michael Stoll                             *
 *                                                                     *
 * This program is free software: you can redistribute it and/or       *
 * modify it under the terms of the GNU General Public License         *
 * as published by the Free Software Foundation, either version 2 of   *
 * the License, or (at your option) any later version.                 *
 *                                                                     *
 * This program is distributed in the hope that it will be useful,     *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of      *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the       *
 * GNU General Public License for more details.                        *
 *                                                                     *
 * You should have received a copy of version 2 of the GNU General     *
 * Public License along with this program.                             *
 * If not, see <http://www.gnu.org/licenses/>.                         *
 ***********************************************************************/

/***********************************************************************
 * init.c                                                              *
 *                                                                     *
 * Macro definitions for the sieve_init functions                      *
 *                                                                     *
 * Michael Stoll, Apr 14, 2009                                         *
 ***********************************************************************/

#include "rp-private.h"

#ifdef USE_SSE
/* The following is for primes < LONG_LENGTH */
#define CODE_INIT_SIEVE1(prime) \
static ratpoints_bit_array *sieve_init_##prime(void *se1, long b1, void *args1) \
{ \
  ratpoints_sieve_entry *se = se1; \
  ratpoints_args *args = args1; \
  register int *isfs = se->is_f_square; \
  register long b = b1; \
  long lmp = LONG_LENGTH % (prime); \
  long ldp = LONG_LENGTH / (prime); \
  long p1 = (ldp + 1) * (prime); \
  long diff_shift = p1 & LONG_MASK; \
  long diff = LONG_LENGTH - diff_shift; \
  register unsigned long help0;\
  { register long a; \
    register long d = se->inverses[b]; \
    register long ab = 0; /* a/b mod p */ \
    register unsigned long test = 1UL; \
    register unsigned long he0 = 0UL; \
    for(a = 0; a < (prime); a++) \
    { if(isfs[ab]) { he0 |= test; } \
      ab += d; \
      if(ab >= (prime)) ab -= (prime); \
      test <<= 1; \
    } \
    help0 = he0; \
  } \
  \
  { register unsigned long help1; \
    { /* repeat bit pattern floor(LONG_LENGTH/p) times */ \
      register unsigned long pattern = help0; \
      register long i; \
      /* the p * (floor(LONG_LENGTH/p) + 1) - LONG_LENGTH \
              = p - (LONG_LENGTH mod p) \
         upper bits into help[b][1] : \
         shift away the  LONG_LENGTH mod p  lower bits */ \
      help1 = pattern >> lmp; \
      for(i = (prime); i < LONG_LENGTH; i <<= 1) \
      { help0 |= help0 << i; } \
      /* \
      for(i = ldp; i; i--) \
      { pattern <<= (prime); help0 |= pattern; } \ */ \
    } \
    \
    { /* fill the bit pattern from help0/help1 into sieve[b][]. \
          sieve[b][a0] has the same semantics as help0/help1, \
          but here, a0 runs from 0 to p-1 and all bits are filled. */ \
      register long a; \
      unsigned long *si = (unsigned long *)args->ba_next; \
      \
      args->ba_next += (prime)*sizeof(ratpoints_bit_array); \
      /* copy the first chunk into sieve[b][] */ \
      si[0] = help0; \
      /* now keep repeating the bit pattern, \
         rotating it in help0/help1 */ \
      for(a = 1 ; a < (prime); a++) \
      { register unsigned long temp = help0 >> diff; \
        help0 = help1 | (help0 << diff_shift); \
        si[a] = help0; \
        help1 = temp; \
      } \
      /* copy into the next p long words */ \
      for(a = 0; a < (prime); a++) \
      { si[a+(prime)] = si[a]; } \
      /* set sieve array */ \
      se->sieve[b] = (ratpoints_bit_array *)si; \
      return((ratpoints_bit_array *)si); \
  } } \
}

/* This is for p > LONG_LENGTH */
#define CODE_INIT_SIEVE2(prime) \
static ratpoints_bit_array *sieve_init_##prime(void *se1, long b1, void *args1) \
{ \
  ratpoints_sieve_entry *se = se1; \
  ratpoints_args *args = args1; \
  register long p = (prime); \
  register int *isfs = se->is_f_square; \
  register long b = b1; \
  /* long ldp = 0;  = LONG_LENGTH / p */ \
  /* long p1 = p; = (ldp + 1) * p; */ \
  long wp = p >> LONG_SHIFT; \
  long diff_shift = p & LONG_MASK; \
  long diff = LONG_LENGTH - diff_shift; \
  unsigned long help[(p>>LONG_SHIFT) + 2]; \
  \
  /* initialize help */ \
  { register unsigned long *he = &help[0]; \
    register unsigned long *he1 = &he[(p>>LONG_SHIFT) + 2]; \
    while(he1 != he) { he1--; *he1 = 0UL; } \
  } \
  { register unsigned long work = 0UL; \
    register long a; \
    register long ab = 0; /* a/b mod p */ \
    register long d = se->inverses[b]; \
    register long n = 0; \
    register unsigned long test = 1UL;  \
    for(a = 0; a < p; ) \
    { if(isfs[ab]) { work |= test; } \
      ab += d; \
      if(ab >= p) ab -= p; \
      test <<= 1; \
      a++; \
      if((a & LONG_MASK) == 0) \
      { help[n] = work; n++; work = 0UL; test = 1UL; } \
    } \
    help[n] = work; \
  } \
  \
  { /* fill the bit pattern from help[] into sieve[b][]. \
       sieve[b][a0] has the same semantics as help[b][a0], \
       but here, a0 runs from 0 to p-1 and all bits are filled. */ \
    register unsigned long *si = (unsigned long *)args->ba_next; \
    register long a1; \
    register long a; \
    \
    args->ba_next += p*sizeof(ratpoints_bit_array); \
    /* copy the first chunk from help[] into sieve[num][b][] */ \
    for(a = 0; a < wp; a++) si[a] = help[a]; \
    /* now keep repeating the bit pattern, rotating it in help */ \
    for(a1 = a ; a < p; a++) \
    { register long t = (a1 == wp) ? 0 : a1+1; \
      help[a1] |= help[t]<<diff_shift; \
      si[a] = help[a1]; \
      a1 = t; \
      help[a1] >>= diff; \
    } \
    /* copy into the next p long words */ \
    for(a = 0; a < p; a++) \
    { si[a+p] = si[a]; } \
    /* set sieve array */ \
    se->sieve[b] = (ratpoints_bit_array *)si; \
    return((ratpoints_bit_array *)si); \
  } \
}

#else

/* The following is for primes < LONG_LENGTH */
#define CODE_INIT_SIEVE1(prime) \
static ratpoints_bit_array *sieve_init_##prime(void *se1, long b1, void *args1) \
{ \
  ratpoints_sieve_entry *se = se1; \
  ratpoints_args *args = args1; \
  register int *isfs = se->is_f_square; \
  register long b = b1; \
  long lmp = LONG_LENGTH % (prime); \
  long ldp = LONG_LENGTH / (prime); \
  long p1 = (ldp + 1) * (prime); \
  long diff_shift = p1 & LONG_MASK; \
  long diff = LONG_LENGTH - diff_shift; \
  register unsigned long help0;\
  { register long a; \
    register long d = se->inverses[b]; \
    register long ab = 0; /* a/b mod p */ \
    register unsigned long test = 1UL; \
    register unsigned long he0 = 0UL; \
    for(a = 0; a < (prime); a++) \
    { if(isfs[ab]) { he0 |= test; } \
      ab += d; \
      if(ab >= (prime)) ab -= (prime); \
      test <<= 1; \
    } \
    help0 = he0; \
  } \
  \
  { register unsigned long help1; \
    { /* repeat bit pattern floor(LONG_LENGTH/p) times */ \
      register unsigned long pattern = help0; \
      register long i; \
      /* the p * (floor(LONG_LENGTH/p) + 1) - LONG_LENGTH \
              = p - (LONG_LENGTH mod p) \
         upper bits into help[b][1] : \
         shift away the  LONG_LENGTH mod p  lower bits */ \
      help1 = pattern >> lmp; \
      for(i = (prime); i < LONG_LENGTH; i <<= 1) \
      { help0 |= help0 << i; } \
      /* \
      for(i = ldp; i; i--) \
      { pattern <<= (prime); help0 |= pattern; } \ */ \
    } \
    \
    { /* fill the bit pattern from help0/help1 into sieve[b][]. \
          sieve[b][a0] has the same semantics as help0/help1, \
          but here, a0 runs from 0 to p-1 and all bits are filled. */ \
      register long a; \
      unsigned long *si = (unsigned long *)args->ba_next; \
      \
      args->ba_next += (prime)*sizeof(ratpoints_bit_array); \
      /* copy the first chunk into sieve[b][] */ \
      si[0] = help0; \
      /* now keep repeating the bit pattern, \
         rotating it in help0/help1 */ \
      for(a = 1 ; a < (prime); a++) \
      { register unsigned long temp = help0 >> diff; \
        help0 = help1 | (help0 << diff_shift); \
        si[a] = help0; \
        help1 = temp; \
      } \
      /* set sieve array */ \
      se->sieve[b] = (ratpoints_bit_array *)si; \
      return((ratpoints_bit_array *)si); \
  } } \
}

/* This is for p > LONG_LENGTH */
#define CODE_INIT_SIEVE2(prime) \
static ratpoints_bit_array *sieve_init_##prime(void *se1, long b1, void *args1) \
{ \
  ratpoints_sieve_entry *se = se1; \
  ratpoints_args *args = args1; \
  register long p = (prime); \
  register int *isfs = se->is_f_square; \
  register long b = b1; \
  /* long ldp = 0;  = LONG_LENGTH / p */ \
  /* long p1 = p; = (ldp + 1) * p; */ \
  long wp = p >> LONG_SHIFT; \
  long diff_shift = p & LONG_MASK; \
  long diff = LONG_LENGTH - diff_shift; \
  unsigned long help[(p>>LONG_SHIFT) + 2]; \
  \
  /* initialize help */ \
  { register unsigned long *he = &help[0]; \
    register unsigned long *he1 = &he[(p>>LONG_SHIFT) + 2]; \
    while(he1 != he) { he1--; *he1 = 0UL; } \
  } \
  { register unsigned long work = 0UL; \
    register long a; \
    register long ab = 0; /* a/b mod p */ \
    register long d = se->inverses[b]; \
    register long n = 0; \
    register unsigned long test = 1UL;  \
    for(a = 0; a < p; ) \
    { if(isfs[ab]) { work |= test; } \
      ab += d; \
      if(ab >= p) ab -= p; \
      test <<= 1; \
      a++; \
      if((a & LONG_MASK) == 0) \
      { help[n] = work; n++; work = 0UL; test = 1UL; } \
    } \
    help[n] = work; \
  } \
  \
  { /* fill the bit pattern from help[] into sieve[b][]. \
       sieve[b][a0] has the same semantics as help[b][a0], \
       but here, a0 runs from 0 to p-1 and all bits are filled. */ \
    register unsigned long *si = (unsigned long *)args->ba_next; \
    register long a1; \
    register long a; \
    \
    args->ba_next += p*sizeof(ratpoints_bit_array); \
    /* copy the first chunk from help[] into sieve[num][b][] */ \
    for(a = 0; a < wp; a++) si[a] = help[a]; \
    /* now keep repeating the bit pattern, rotating it in help */ \
    for(a1 = a ; a < p; a++) \
    { register long t = (a1 == wp) ? 0 : a1+1; \
      help[a1] |= help[t]<<diff_shift; \
      si[a] = help[a1]; \
      a1 = t; \
      help[a1] >>= diff; \
    } \
    /* set sieve array */ \
    se->sieve[b] = (ratpoints_bit_array *)si; \
    return((ratpoints_bit_array *)si); \
  } \
}

#endif

#include "init_sieve.h"
