Main Page   Reference Manual   Compound List   File List  

libecc/polynomial.h

Go to the documentation of this file.
00001 //
00012 //
00013 // This file is part of the libecc package.
00014 // Copyright (C) 2002 - 2004 by
00015 //
00016 // Carlo Wood, Run on IRC <carlo@alinoe.com>
00017 // RSA-1024 0x624ACAD5 1997-01-26                    Sign & Encrypt
00018 // Fingerprint16 = 32 EC A7 B6 AC DB 65 A6  F6 F6 55 DD 1C DC FF 61
00019 //
00020 // This program is free software; you can redistribute it and/or
00021 // modify it under the terms of the GNU General Public License
00022 // as published by the Free Software Foundation; either version 2
00023 // of the License, or (at your option) any later version.
00024 //
00025 // This program is distributed in the hope that it will be useful,
00026 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00027 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00028 // GNU General Public License for more details.
00029 //
00030 // You should have received a copy of the GNU General Public License
00031 // along with this program; if not, write to the Free Software
00032 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00033 //
00034 
00035 #ifndef LIBECC_POLYNOMIAL_H
00036 #define LIBECC_POLYNOMIAL_H
00037 
00038 #include <stdexcept>
00039 #include <libecc/bitset.h>
00040 #include <libecc/debug.h>
00041 #if ECC_DEBUGOUTPUT
00042 #include <libcwd/cwprint.h>
00043 #endif
00044 
00045 #if ECC_DEBUG
00046 #define LIBECC_AUGMENTED 1
00047 #define LIBECC_INPLACE (1 || !LIBECC_AUGMENTED)
00048 #define LIBECC_SWAPCOLUMNS (1 || LIBECC_INPLACE)
00049 #else
00050 // Don't change these.
00051 #define LIBECC_AUGMENTED 0
00052 #define LIBECC_INPLACE 1
00053 #define LIBECC_SWAPCOLUMNS 1
00054 #endif
00055 
00056 namespacelibecc {
00057 
00058 // Forward declarations.
00059 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00060   classpolynomial;
00061 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00062   polynomial<m, k, k1, k2> operator*(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00063 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00064   polynomial<m, k, k1, k2> operator/(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00065 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00066   bool operator==(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00067 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00068   bool operator!=(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00069 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00070   std::ostream& operator<<(std::ostream&, polynomial<m, k, k1, k2> const&);
00071 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00072   std::ostream& operator<<(std::ostream&, typename polynomial<m, k, k1, k2>::xor_type const&);
00073 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00074   typename polynomial<m, k, k1, k2>::xor_type operator+(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00075 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00076   typename polynomial<m, k, k1, k2>::xor_type operator-(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00077 
00091 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00092   classpolynomial {
00093     public:
00097       typedef Operator::bitsetExpression<m, false, false, Operator::bitsetXOR> xor_type;
00098 
00099       // Fix this if you add members in front of M_coefficients.
00100       static size_t const offsetof_vector = bitset<m>::offsetof_vector;
00101 
00102     private:
00103       bitset<m> M_coefficients;
00104       static polynomial<m, k, k1, k2> const one;
00105       static bool S_normal_initialized;
00106       static bitset<m> S_normal;
00107 
00108     public:
00112       static polynomial const& unity(void) { return one; }
00113 
00114     public:
00118       polynomial(void) { }
00119 
00123       explicit polynomial(bitset_digit_t coefficients) : M_coefficients(coefficients) { }
00124 
00128       polynomial(polynomial const& p) : M_coefficients(p.M_coefficients) { }
00129 
00133       explicit polynomial(bitset<m> const& coefficients) : M_coefficients(coefficients) { }
00134 
00138       polynomial(std::string const& coefficients) : M_coefficients(coefficients) { }
00139 
00180       polynomial(xor_type const& expression) : M_coefficients(expression) { }
00181 
00185       polynomial& operator=(polynomial const& p) { M_coefficients = p.M_coefficients; return *this; }
00186 
00190       polynomial& operator=(bitset<m> const& coefficients) { M_coefficients = coefficients; return *this; }
00191 
00196       polynomial& operator=(xor_type const& expression);
00197 
00201       polynomial(polynomial const& b, polynomial const& c);
00202 
00206       static unsigned int const square_digits = 2 * bitset_base<m>::digits + 4;
00207 
00223       polynomial& square(bitset_digit_t* tmpbuf) const; // tmpbuf must be an array of `square_digits' bitset_digit_t.
00224 
00232       bool sqrt(void);
00233 
00234       // The field arithmetic is implemented in terms of operations on the bits.
00238       polynomial& operator+=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00239 
00243       polynomial& operator-=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00244 
00248       polynomial& operator*=(polynomial const& p);
00249 #ifdef LIBECC_DOXYGEN
00250       // Stupid doxygen.
00262       polynomial& operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00263 #else
00264       // The real prototype.
00265       polynomial& operator*=(xor_type const& expr);
00266 #endif
00267 
00271       polynomial& operator/=(polynomial const& p);
00272 #ifdef LIBECC_DOXYGEN
00273       // Stupid doxygen.
00285       polynomial& operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00286 #else
00287       // The real prototype.
00288       polynomial& operator/=(xor_type const& expr);
00289 #endif
00290 
00299       static bitset<m> const& normal(void) { if (!S_normal_initialized) calculate_normal(); return S_normal; }
00300 
00312       int trace(void) const
00313      {
00314         // This method was invented by me, so give me credit for it when you use it somewhere. Thank you.
00315         // Carlo Wood <carlo@alinoe.com> -- 4 December 2004.
00316         int tr = 0;
00317         if ((m & 1))
00318           tr = M_coefficients.template test<0>();
00319         if (((m - k) & 1))
00320           tr ^= M_coefficients.template test<m - k>();
00321         if (k1)
00322         {
00323           if (((m - k1) & 1))
00324             tr ^= M_coefficients.template test<m - k1>();
00325           if (((m - k2) & 1))
00326             tr ^= M_coefficients.template test<m - k2>();
00327         }
00328         return tr;
00329       }
00330 
00363       friend xor_type operator+ <>(polynomial const& p1, polynomial const& p2);
00364 
00373       friend xor_type operator- <>(polynomial const& p1, polynomial const& p2);
00374 
00378       friend polynomial operator* <>(polynomial const& p1, polynomial const& p2);
00379 #ifdef LIBECC_DOXYGEN
00380       // Only added for documentational reasons.
00386       friend bool operator*(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00392       friend bool operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00393 #endif
00394 
00398       friend polynomial operator/ <>(polynomial const& p1, polynomial const& p2);
00399 #ifdef LIBECC_DOXYGEN
00400       // Only added for documentational reasons.
00406       friend bool operator/(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00412       friend bool operator/(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00413 #endif
00414 
00418       friend bool operator== <>(polynomial const& p1, polynomial const& p2);
00419 #ifdef LIBECC_DOXYGEN
00420       // Only added for documentational reasons.
00428       friend bool operator==(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00436       friend bool operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00437 #endif
00438 
00442       friend bool operator!= <>(polynomial const& p1, polynomial const& p2);
00443 #ifdef LIBECC_DOXYGEN
00444       // Only added for documentational reasons.
00452       friend bool operator!=(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00460       friend bool operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00461 #endif
00462 
00468       friend std::ostream& operator<< <>(std::ostream& os, polynomial const& p);
00469 #ifdef LIBECC_DOXYGEN
00470       // Only added for documentational reasons.
00476       friend std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2>::xor_type const& expr);
00477 #endif
00478 
00482       bitset<m> const& get_bitset(void) const{ return M_coefficients; }
00483 
00487       bitset<m>& get_bitset(void) { return M_coefficients; }
00488 
00489     private:
00490       static void reduce(bitset_digit_t* buf);
00491       static bitset_digit_t reducea(bitset_digit_t* a);
00492       static void calculate_normal(void);
00493 
00494       void multiply_with(polynomial const& p1, bitset<m>& result) const;
00495 #if ECC_DEBUG
00496 #if LIBECC_AUGMENTED
00497       void print_matrix(bitset<2 * m> const* matrix, bitset<m> const& pivotted);
00498 #else
00499       void print_matrix(bitset<m> const* matrix, bitset<m> const& pivotted);
00500 #endif
00501 #endif
00502   };
00503 
00504 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00505   polynomial<m, k, k1, k2> const polynomial<m, k, k1, k2>::one(1);
00506 
00507 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00508   bool polynomial<m, k, k1, k2>::sqrt(void)
00509   {
00510     if (!k1)
00511     {
00512       bitset<m> highbits;
00513       highbits.reset();
00514 
00515       // First convert all odd powers into even powers
00516       if ((m & 1) == 1)
00517       {
00518         if ((k & 1) == 1)               // m and k are odd?
00519         {
00520           for(unsigned int bit = 1; bit < m; bit += 2)
00521           {
00522             if (M_coefficients.test(bit))
00523             {
00524               if (bit >= m - k)
00525                 highbits.flip(bit + k - m);
00526               else
00527                 M_coefficients.flip(bit + k);
00528               highbits.flip(bit);
00529             }
00530           }
00531         }
00532         else                    // m is odd and k is even
00533         {
00534           for(unsigned int bit = 1; bit < m; bit += 2)
00535           {
00536             if (M_coefficients.test(bit))
00537             {
00538               if (bit >= m - k)
00539               {
00540                 M_coefficients.flip(bit + 2 * k - m);
00541                 M_coefficients.flip(bit + k - m);
00542               }
00543               else
00544                 M_coefficients.flip(bit + k);
00545               highbits.flip(bit);
00546             }
00547           }
00548         }
00549       }
00550       else if ((k & 1) == 1)    // m is even and k is odd
00551       {
00552         for(unsigned int bit = 1; bit < m; bit += 2)
00553         {
00554           if (M_coefficients.test(bit))
00555           {
00556             if (bit < k)
00557             {
00558               M_coefficients.flip(bit + k);
00559               M_coefficients.flip(bit + m - k);
00560               highbits.flip(bit + m - k);
00561             }
00562             else
00563             {
00564               M_coefficients.flip(bit - k);
00565               highbits.flip(bit - k);
00566             }
00567           }
00568         }
00569       }
00570       else                      // m and k are both even (actually, this should never be used as reduction polynomial).
00571       {
00572         for(unsigned int bit = 1; bit < m; bit += 2)
00573           if (M_coefficients.test(bit))
00574             return false;               // This can't be a square
00575       }
00576 
00577       // Next handle the remaining even powers
00578       unsigned int bit_to = 1;
00579       for(unsigned int bit = 2; bit < m; bit += 2)
00580       {
00581         if (M_coefficients.test(bit))
00582           M_coefficients.set(bit_to);
00583         else
00584           M_coefficients.clear(bit_to);
00585         ++bit_to;
00586       }
00587       for(unsigned int bit = m % 2; bit < m; bit += 2)
00588       {
00589         if (highbits.test(bit))
00590           M_coefficients.set(bit_to);
00591         else
00592           M_coefficients.clear(bit_to);
00593         ++bit_to;
00594       }
00595     }
00596     else
00597     {
00598       structRoot {
00599         polynomial<m, k, k1, k2> value;
00600         Root(polynomial<m, k, k1, k2> const& p) : value(p)
00601         {
00602           bitset_digit_t p2buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00603           polynomial<m, k, k1, k2>& p2 = value.square(p2buf);
00604           bitset_digit_t p4buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00605           polynomial<m, k, k1, k2>& p4 = p2.square(p4buf);
00606           for (int i = 1; i < m / 2; ++i)
00607           {
00608             p4.square(p2buf);
00609             p2.square(p4buf);
00610           }
00611           value = (m % 2 == 0) ? p2 : p4;
00612         }
00613       };
00614       static Root const root_of_t(polynomial<m, k, k1, k2>(2));
00615       polynomial<m, k, k1, k2> tmp(0);
00616       bitset<m> tmp2;
00617       tmp2.reset();
00618       for(unsigned int bit = 0; bit < m / 2; ++bit)
00619       {
00620         if (M_coefficients.test(2 * bit))
00621           tmp2.set(bit);
00622         if (M_coefficients.test(2 * bit + 1))
00623           tmp.get_bitset().set(bit);
00624       }
00625       if (m % 2 == 1 && M_coefficients.test(m - 1))
00626         tmp2.set(m / 2);
00627       M_coefficients = tmp2;
00628       *this += tmp * root_of_t.value;
00629     }
00630     return true;
00631   }
00632 
00633 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00634   inline polynomial<m, k, k1, k2>&
00635   polynomial<m, k, k1, k2>::operator*=(polynomial const& p)
00636   {
00637     multiply_with(p, M_coefficients);
00638     return *this;
00639   }
00640 
00641 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00642   inline polynomial<m, k, k1, k2>&
00643   polynomial<m, k, k1, k2>::operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
00644   {
00645     return (*this *= polynomial<m, k, k1, k2>(expr));
00646   }
00647 
00648 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00649   inline polynomial<m, k, k1, k2>&
00650   polynomial<m, k, k1, k2>::operator=(xor_type const& expression)
00651   {
00652     M_coefficients = expression;
00653     return *this;
00654   }
00655 
00656 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00657   void
00658   polynomial<m, k, k1, k2>::multiply_with(polynomial const& p1, bitset<m>& result) const
00659  {
00660     bitset_digit_t output[bitset<m>::digits * 2] __attribute__ ((aligned (8)));
00661 
00662     // Find the first non-zero digit in the input polynomial of this object.
00663     unsigned int digit = 0;
00664     while(M_coefficients.digit(digit) == 0)             // Still zero?
00665     {
00666       output[digit] = 0;                                // That means that the output will end on zero too.
00667       if (++digit == bitset<m>::digits)
00668       {
00669         result.reset();                                 // The whole polynomial is zero, the result will be zero too.
00670         return;
00671       }
00672     }
00673     unsigned int uninitialized_digit = digit;           // The next digit of `output' that has not yet been initialized.
00674     // Find the first digit in the input polynomial of this object whose first bit is set.
00675     for(; digit < bitset<m>::digits; ++digit)
00676     {
00677       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00678       {
00679         // Set the output to p1 times this bit.
00680         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00681           output[d + digit] = p1.get_bitset().digit(d);
00682         uninitialized_digit = bitset<m>::digits + digit;
00683         ++digit;                                        // Set to the next input digit.
00684         break;
00685       }
00686       output[digit] = 0;                                // Initialize this digit of the output to 0.
00687       ++uninitialized_digit;
00688     }
00689     // Set the remaining digits to zero, if any.
00690     for(unsigned int remaining_digit = uninitialized_digit; remaining_digit < sizeof(output) / sizeof(bitset_digit_t); ++remaining_digit)
00691       output[remaining_digit] = 0;
00692     // Find for the remaining input digits the ones that have their first bit set.
00693     for(; digit < bitset<m>::digits; ++digit)
00694       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00695       {
00696         // Add p1 times this bit to the output.
00697         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00698           output[d + digit] ^= p1.get_bitset().digit(d);
00699       }
00700     // Create a bitset that will contain p1, shifted at most bitset_digit_bits - 1 to the left.
00701     bitset<m + bitset_digit_bits - 1> shifted_p1;
00702     // Start with having it shifted 1 bit to the left.
00703     bitset_digit_t carry = 0;
00704     unsigned int d = 0;
00705     for(bitset_digit_t const* ptr = p1.get_bitset().digits_ptr(); ptr < p1.get_bitset().digits_ptr() + bitset<m>::digits; ++ptr, ++d)
00706     {
00707       shifted_p1.rawdigit(d) = (*ptr << 1) | carry;
00708       carry = *ptr >> (8 * sizeof(bitset_digit_t) - 1);
00709     }
00710     if (d < bitset<m + bitset_digit_bits - 1>::digits)
00711       shifted_p1.rawdigit(d) = carry;
00712     for(bitset_digit_t bitmask = 2;;)
00713     {
00714       for(unsigned int digit = 0; digit < bitset<m>::digits; ++digit)
00715         if ((M_coefficients.digit(digit) & bitmask))
00716         {
00717           for (unsigned int d = 0; d < shifted_p1.digits; ++d)
00718             output[d + digit] ^= shifted_p1.digit(d);
00719         }
00720       bitmask <<= 1;            // Next bit.
00721       if (bitmask == 0)         // Done?
00722         break;
00723       // Shift p1 one bit further to the left.
00724       shifted_p1.template shift_op<1, left, assign>(shifted_p1);
00725     }
00726     // Reduce the resulting output of the multiplication.
00727     reduce(output);
00728     // Copy the reduced output to `result'.
00729     std::memcpy(result.digits_ptr(), output, bitset<m>::digits * sizeof(bitset_digit_t));
00730   }
00731 
00732 #if ECC_DEBUG
00733 template<unsigned int m>
00734 structdiv_tct {
00735   bitset_digit_t const* M_p;
00736   int M_deg;
00737   int M_low;
00738   div_tct(bitset<m> const& b, int deg, int low) : M_p(b.digits_ptr()), M_deg(deg), M_low(low) { }
00739   void print_on(std::ostream& os) const
00740  {
00741     int lowbit = (M_low >> bitset_digit_bits_log2) * bitset_digit_bits;
00742     if (lowbit > 0)
00743       lowbit = 0;
00744     for (int b = 2 * m - 1; b >= lowbit; --b)
00745     {
00746       if (b == M_deg)
00747         os << "\e[31m";
00748       int digitoffset = (b >> bitset_digit_bits_log2);
00749       bitset_digit_t mask = 1 << (b & (bitset_digit_bits - 1));
00750       if (M_p[digitoffset] & mask)
00751         os << '1';
00752       else
00753         os << '0';
00754       if (b == M_low)
00755         os << "\e[0m";
00756       if (b == 0)
00757         os << '.';
00758     }
00759   }
00760 };
00761 #endif
00762 
00763 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00764   polynomial<m, k, k1, k2>&
00765   polynomial<m, k, k1, k2>::operator/=(polynomial const& p)
00766   {
00767 #if ECC_DEBUG
00768     LibEccDout(dc::polynomial|noprefix_cf, "");
00769     LibEccDout(dc::polynomial, "Entering polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()");
00770     polynomial<m, k, k1, k2> x(p.get_bitset());
00771     polynomial<m, k, k1, k2> y(M_coefficients);
00772     LibEccDout(dc::polynomial, "x(t) = " << x);
00773     LibEccDout(dc::polynomial|flush_cf, "y(t) = " << y);
00774 #endif
00775 
00776     // The following algorithm is based on the algorithm
00777     // described in http://research.sun.com/techrep/2001/smli_tr-2001-95.ps
00778     // with significant optimization changes by Carlo Wood.
00779 
00780     // Make sure that there is enough space for a full bitset object
00781     // and align the bitsets on a multiple of bitset_digit_t.
00782     static unsigned int const digit_offset_UV = ((sizeof(bitset<m>) * 8 - 1) / bitset_digit_bits + 1);
00783     static unsigned int const offset_UV = digit_offset_UV * bitset_digit_bits;
00784     // Make room for exponents from at least t^-m till t^2m.
00785     static unsigned int const digit_size_UV = 3 * digit_offset_UV;
00786     // Variables A and B do not need this much space.
00787     static unsigned int const digit_size_AB = bitset<m>::digits;
00788     // One digit of padding, needed for assembly routine.
00789     static unsigned int const padding_digit_size = 1;
00790 
00791     // Declare stack space for four variables.
00792     bitset_digit_t bitpool [5 * padding_digit_size + 2 * digit_size_AB + 2 * digit_size_UV]
00793         __attribute__ ((__aligned__ (32)));
00794     std::memset((char*)bitpool, 0, sizeof(bitpool));
00795 
00796     bitset<m>& A(*(bitset<m>*)&bitpool[padding_digit_size]);
00797     bitset<m>& B(*(bitset<m>*)&bitpool[2 * padding_digit_size + digit_size_AB]);
00798     bitset<m>& U(*(bitset<m>*)&bitpool[3 * padding_digit_size + 2 * digit_size_AB + digit_offset_UV]);
00799     bitset<m>& V(*(bitset<m>*)&bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV + digit_offset_UV]);
00800 
00801     // The representation of U and V will be done with bitsets of size `digit_size_UV * bitset_digit_bits'.
00802     // This means that they contain powers of t with a negative exponent.
00803     // That is not a problem as those are well defined: t^(-n) = 1 / t^n.
00804 
00805     // Let rp = M(t) = t^m + t^k [+ t^k1 + t^k2] + 1.
00806 #if ECC_DEBUG
00807     bitset<m + 1> rp("1");
00808     rp.template set<m>();
00809     rp.template set<k>();
00810     if (k1)
00811     {
00812       rp.template set<k1>();
00813       rp.template set<k2>();
00814     }
00815 #endif
00816 
00817     // Let U(t) = y(t) (= M_coefficients).
00818     LibEccDout(dc::polynomial|flush_cf, "U <- y");
00819     U = M_coefficients;
00820 
00821     // Guess the maximum and minimum powers to be the possible limits.
00822     int degU = m - 1;
00823     int lowU = 0;
00824 
00825     // Let A(t) = x(t).
00826     LibEccDout(dc::polynomial|flush_cf, "A <- x");
00827     A = p.get_bitset();
00828 
00829     // Then
00830     //
00831     // A(t) * y(t) = U(t) * x(t)  [mod M(t)].
00832 
00833     // Let V(t) = 0
00834     // Let B = M(t)
00835     //
00836     // Then
00837     //
00838     // B(t) * y(t) = V(t) * x(t)  [mod M(t)].
00839     //
00840     // Let degA be the highest power of t in A.
00841     typename bitset<m>::const_reverse_iterator degA = A.rbegin();
00842     degA.find1();
00843     LibEccDout(dc::polynomial|flush_cf, "deg(A) == " << degA);
00844 
00845     // Let lowA be the lowest power of t in A.
00846     typename bitset<m>::const_iterator lowA = A.begin();
00847     lowA.find1();
00848     LibEccDout(dc::polynomial|flush_cf, "low(A) == " << lowA);
00849 
00850     unsigned int sizeA = degA.get_index() - lowA.get_index();
00851 
00852     // Let n = m - deg(A).
00853     unsigned int n = m - degA.get_index();
00854     //
00855     // Then B'(t) = B(t) - A(t) * t^n will have a degree less than m.
00856     // And
00857     //
00858     // B'(t) * y(t) = B(t) * y(t) - A(t) * y(t) * t^n =
00859     //              = V(t) * x(t) - U(t) * x(t) * t^n =
00860     //              = (V(t) - U(t) * t^n) * x(t) =
00861     //              = V'(t) * x(t)                      [mod M(t)].
00862     //
00863     // B <- B'
00864     LibEccDout(dc::polynomial|flush_cf, "B <- A * t^" << n << " + " << cwprint_using(rp, &bitset<m+1>::base2_print_on));
00865     B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), n);
00866     B.template flip<m>();
00867     B.template flip<k>();
00868     if (k1)
00869     {
00870       B.template flip<k1>();
00871       B.template flip<k2>();
00872     }
00873     B.template flip<0>();
00874 
00875     // Let degB be the highest power of t in B.
00876     typename bitset<m>::const_reverse_iterator degB = B.rbegin();
00877     degB.find1();
00878     LibEccDout(dc::polynomial|flush_cf, "deg(B) == " << degB);
00879 
00880     // Let lowB be the lowest power of t in B.
00881     typename bitset<m>::const_iterator lowB = B.begin();
00882     lowB.find1();
00883     LibEccDout(dc::polynomial|flush_cf, "low(B) == " << lowB);
00884 
00885     // V <- V'
00886     LibEccDout(dc::polynomial|flush_cf, "V <- U * t^" << n <<
00887         "  [mod " << cwprint_using(rp, &bitset<m + 1>::base2_print_on) << "]");
00888     V.xor_with_zero_padded(U, 0, m - 1, n);
00889 
00890     int degV = degU + n;
00891     int lowV = lowU + n;
00892     
00893     unsigned int sizeB = degB.get_index() - lowB.get_index();
00894 
00895     if (sizeA > 0 && sizeB > 0)
00896       for(;;)
00897       {
00898         LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00899         LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00900         LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00901         LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00902         if (sizeA < sizeB)
00903         {
00904           int left_shift = lowB.get_index() - lowA.get_index();
00905           LibEccDout(dc::polynomial|flush_cf, "B <- B + A * t^" << left_shift);
00906           B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), left_shift);
00907           degB.find1();
00908           lowB.find1();
00909           sizeB = degB.get_index() - lowB.get_index();
00910           LibEccDout(dc::polynomial|flush_cf, "V <- V + U * t^" << left_shift);
00911           V.xor_with_zero_padded(U, lowU, degU, left_shift);
00912           degV = std::max(degV, degU + left_shift);
00913           lowV = std::min(lowV, lowU + left_shift);
00914           if (sizeB == 0)
00915             break;
00916         }
00917         else
00918         {
00919           int left_shift = lowA.get_index() - lowB.get_index();
00920           LibEccDout(dc::polynomial|flush_cf, "A <- A + B * t^" << left_shift);
00921           A.xor_with_zero_padded(B, lowB.get_index(), degB.get_index(), left_shift);
00922           degA.find1();
00923           lowA.find1();
00924           sizeA = degA.get_index() - lowA.get_index();
00925           LibEccDout(dc::polynomial|flush_cf, "U <- U + V * t^" << left_shift);
00926           U.xor_with_zero_padded(V, lowV, degV, left_shift);
00927           degU = std::max(degU, degV + left_shift);
00928           lowU = std::min(lowU, lowV + left_shift);
00929           if (sizeA == 0)
00930             break;
00931         }
00932       }
00933 
00934     LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00935     LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00936     LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00937     LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00938 
00939     bitset<m>* R;
00940     // 'F' (Floating-point polynomial) will be shifted to the right and
00941     // is therefore defined to run from t^-2m till t^2m.  This means it will
00942     // be shifted OVER the other bitsets, but we don't need those anymore anyway.
00943     static unsigned int const offset_F = 2 * offset_UV;
00944     static unsigned int const size_F = 2 * m + offset_F;
00945     bitset<size_F>* F;
00946     int low1, lowR;
00947 #if ECC_DEBUG
00948     int degR;
00949 #endif
00950     if (sizeA == 0)
00951     {
00952       LibEccDout(dc::polynomial|flush_cf, "R = U");
00953       R = &U;
00954       F = (bitset<size_F>*)&bitpool[3 * padding_digit_size + 2 * digit_size_AB - digit_offset_UV];
00955       low1 = lowA.get_index();
00956       lowR = lowU;
00957 #if ECC_DEBUG
00958       degR = degU;
00959 #endif
00960     }
00961     else if (sizeB == 0)
00962     {
00963       LibEccDout(dc::polynomial|flush_cf, "R = V");
00964       R = &V;
00965       F = (bitset<size_F>*)&bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV - digit_offset_UV];
00966       low1 = lowB.get_index();
00967       lowR = lowV;
00968 #if ECC_DEBUG
00969       degR = degV;
00970 #endif
00971     }
00972 
00973     *F >>= low1;
00974     lowR -= low1;
00975 #if ECC_DEBUG
00976     degR -= low1;
00977 #endif
00978     // Get rid of negative exponents.
00979     LibEccDout(dc::polynomial|flush_cf, "lowR = " << lowR);
00980     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
00981     if ((!k1 && k >= 32) || k2 >= 32)
00982     {
00983       static int const digit_shift_k2 = k2 >> bitset_digit_bits_log2;
00984       static int const bit_shift_k2 = k2 & (bitset_digit_bits  - 1);
00985       static int const digit_shift_k1 = k1 >> bitset_digit_bits_log2;
00986       static int const bit_shift_k1 = k1 & (bitset_digit_bits  - 1);
00987       static int const digit_shift_k = k >> bitset_digit_bits_log2;
00988       static int const bit_shift_k = k & (bitset_digit_bits  - 1);
00989       static int const digit_shift_m = m >> bitset_digit_bits_log2;
00990       static int const bit_shift_m = m & (bitset_digit_bits  - 1);
00991       static int const thirtytwo_minus_bit_shift_k2_with_compile_warning_evasion = (32 - bit_shift_k2) & (bitset_digit_bits  - 1);
00992       static int const thirtytwo_minus_bit_shift_k1_with_compile_warning_evasion = (32 - bit_shift_k1) & (bitset_digit_bits  - 1);
00993       static int const thirtytwo_minus_bit_shift_k_with_compile_warning_evasion = (32 - bit_shift_k) & (bitset_digit_bits  - 1);
00994       static int const thirtytwo_minus_bit_shift_m_with_compile_warning_evasion = (32 - bit_shift_m) & (bitset_digit_bits  - 1);
00995       int first_digit = (lowR + offset_F) >> bitset_digit_bits_log2;
00996       bitset_digit_t* ptr = F->digits_ptr() + first_digit;
00997       bitset_digit_t* ptr1 = R->digits_ptr();
00998       while(ptr < ptr1)
00999       {
01000         if (k1)
01001         {
01002           ptr[digit_shift_k2] ^= (*ptr) << bit_shift_k2;
01003           if (bit_shift_k2 != 0)
01004             ptr[digit_shift_k2 + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k2_with_compile_warning_evasion;
01005           ptr[digit_shift_k1] ^= (*ptr) << bit_shift_k1;
01006           if (bit_shift_k1 != 0)
01007             ptr[digit_shift_k1 + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k1_with_compile_warning_evasion;
01008         }
01009         ptr[digit_shift_k] ^= (*ptr) << bit_shift_k;
01010         if (bit_shift_k != 0)
01011           ptr[digit_shift_k + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k_with_compile_warning_evasion;
01012         ptr[digit_shift_m] ^= (*ptr) << bit_shift_m;
01013         if (bit_shift_m != 0)
01014           ptr[digit_shift_m + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_m_with_compile_warning_evasion;
01015         ++ptr;
01016       }
01017     }
01018     else
01019     {
01020       for (int i = lowR + offset_F; i < offset_F; ++i)
01021       {
01022         if (F->test(i))
01023         {
01024 #if ECC_DEBUG
01025           F->flip(i);           // This is not really needed, but prints nicer output below.
01026 #endif
01027           if (k1)
01028           {
01029             F->flip(i + k2);
01030             F->flip(i + k1);
01031           }
01032           F->flip(i + k);
01033           F->flip(i + m);
01034         }
01035       }
01036     }
01037 #if ECC_DEBUG
01038     lowR = 0;
01039     degR = 2 * m - 1;
01040 #endif
01041     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01042     reduce(R->digits_ptr());
01043 #if ECC_DEBUG
01044     degR = m - 1;
01045 #endif
01046     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01047     M_coefficients = *R;
01048 
01049     return *this;
01050   }
01051 
01052 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01053   inline polynomial<m, k, k1, k2>&
01054   polynomial<m, k, k1, k2>::operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
01055   {
01056     return (*this /= polynomial<m, k, k1, k2>(expr));
01057   }
01058 
01059 // Solve x^2 + b x = c.
01060 // Assuming that b != 0, there are 2 solutions: x1 and x1 + b.
01061 // This means that during the 'wiping' of the matrix in order
01062 // to solve x, one bit of x will stay undetermined.  We need
01063 // to take special care to make sure that this will be a bit
01064 // for which a bit of 'b' is set, otherwise we'd return a wrong
01065 // value.
01066 //
01067 // If b equals zero, then the solution is the sqrt(c).  Otherwise
01068 // we can devide both sides of the equation by b^2 and solve
01069 // y^2 + y = c/b^2, and set x = b * y.
01070 //
01071 // There will only be a solution to this equation iff 0 = Tr(c/b^2).
01072 // (simply square the equation m-1 times and add them all up).
01073 //
01074 // Note that if y1 is a solution, then so is y1 + 1, hence we
01075 // cannot determine the least significant bit of y.
01076 //
01077 // It is possible to compose a matrix A such that Ax = x^2 + x
01078 // because squaring is an automorphism of the field:
01079 // x is a sum of basis elements, ie x = b1 + b2 + b3 and
01080 // x^2 = b1^2 + b2^2 + b3^2.  Therefore, if there exists a
01081 // matrix S such that Sb_i = b_i^2 for any basis element then
01082 // A = (S + I).  Moreover, such a matrix S must exist because
01083 // there are exactly m basis elements, and a matrix of mxm
01084 // will always be able to satisfy that.
01085 
01086 #if ECC_DEBUG
01087 // Debug function.
01088 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01089   void polynomial<m, k, k1, k2>::print_matrix(
01090 #if LIBECC_AUGMENTED
01091        bitset<2 * m> const* matrix,
01092 #else
01093        bitset<m> const* matrix,
01094 #endif
01095        bitset<m> const& pivotted)
01096   {
01097     // Print the matrix.
01098     for (unsigned int n = 1; n < m; n *= 10)
01099     {
01100       LibEccDout(dc::gaussj|continued_cf, "  ");
01101       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01102       {
01103         if (bit == m)
01104           LibEccDout(dc::continued, ' ');
01105         if ((bit % m) >= 1 && (bit % m) < (m + 1) / 2)
01106           LibEccDout(dc::continued, "+ ");
01107         else if (pivotted.test(bit % m))
01108           LibEccDout(dc::continued, (((bit % m) / n) % 10) << ' ');
01109         else
01110           LibEccDout(dc::continued, "  ");
01111       }
01112       LibEccDout(dc::finish, "");
01113     }
01114     for (unsigned int row = 0; row < m; ++row)
01115     {
01116       std::string line;
01117       if (row >= 1 && row < (m + 1) / 2)
01118         line = "+ ";
01119       else if (pivotted.test(row))
01120         line = "* ";
01121       else
01122         line = "  ";
01123       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01124       {
01125         if (bit == m)
01126           line += ' ';
01127         bool isset = matrix[row].test(bit);
01128         bool need_color = LIBECC_INPLACE && (matrix->number_of_bits > m) &&
01129             (((bit % m) >= 1 && (bit % m) < (m + 1) / 2) || pivotted.test(bit % m));
01130         if (need_color)
01131         {
01132           unsigned int corresponding_bit = (bit + m) % (2 * m);
01133           if (isset == matrix[row].test(corresponding_bit))
01134             line += "\e[32m";
01135           else
01136             line += "\e[31m";
01137         }
01138         line += (isset ? '1' : '0');
01139         if (need_color)
01140           line += "\e[0m";
01141         line += ' ';
01142       }
01143       LibEccDout(dc::gaussj, line);
01144     }
01145     LibEccDout(dc::gaussj|noprefix_cf, "");
01146   }
01147 #endif
01148 
01149 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01150   polynomial<m, k, k1, k2>::polynomial(polynomial<m, k, k1, k2> const& b, polynomial<m, k, k1, k2> const& c) :
01151       M_coefficients(0)
01152   {
01153     // If b == 0, then x = sqrt(c).
01154     if (!b.M_coefficients.any())
01155     {
01156       M_coefficients = c.M_coefficients;
01157       sqrt();
01158       return;
01159     }
01160 
01161     // Calculate c/b^2.
01162     bitset_digit_t b2buf[square_digits];
01163     polynomial<m, k, k1, k2>& b2 = b.square(b2buf);
01164     polynomial<m, k, k1, k2> cdb2(c);
01165     cdb2 /= b2;
01166     if (cdb2.trace() == 1)
01167       throw std::domain_error("x^2 + bx = c has no solution");
01168 
01169 #if LIBECC_AUGMENTED
01170     typedef bitset<2 * m> matrixrow_type;
01171 #else
01172     typedef bitset<m> matrixrow_type;
01173 #endif
01174     static matrixrow_type matrix[m];            // A mx2m or mxm matrix.
01175     static bool matrix_initialized;
01176     if (!matrix_initialized)
01177     {
01178       std::memset(matrix, 0, sizeof(matrix));
01179       // Fill this matrix with either the augmented matrix (A|I) or with just A,
01180       // where A is the matrix such that Ax = x^2 + x.
01181       for (unsigned int bit = 0; bit < m; ++bit)
01182       {
01183         matrix[bit].set(bit);           // The I of A = (S + I).
01184 #if LIBECC_AUGMENTED
01185         matrix[bit].set(bit + m);               // The I of (A|I).
01186 #endif
01187       }
01188       for (unsigned int bit = 0; bit < (m + 1) / 2; ++bit)
01189         matrix[2 * bit].flip(bit);      // The square of low exponents.
01190       for (unsigned int bit = (m + 1) / 2; bit < m; ++bit)
01191         matrix[2 * bit - m].set(bit);   // Reduction with m.
01192       for (unsigned int bit = (m + 1) / 2; bit < m - k / 2; ++bit)
01193         matrix[2 * bit - m + k].flip(bit);      // Reduction with m - k.
01194       if (k1)
01195       {
01196         for (unsigned int bit = (m + 1) / 2; bit < m - k1 / 2; ++bit)
01197           matrix[2 * bit - m + k1].flip(bit);   // Reduction with m - k1.
01198         for (unsigned int bit = (m + 1) / 2; bit < m - k2 / 2; ++bit)
01199           matrix[2 * bit - m + k2].flip(bit);   // Reduction with m - k2.
01200       }
01201       for (unsigned int bit = m - k / 2; bit < m; ++bit)
01202       {
01203         matrix[2 * bit - m + k - m].flip(bit);
01204         matrix[2 * bit - m + k - m + k].flip(bit);
01205         if (k1)
01206         {
01207           matrix[2 * bit - m + k - m + k1].flip(bit);
01208           matrix[2 * bit - m + k - m + k2].flip(bit);
01209         }
01210       }
01211       if (k1)
01212       {
01213         for (unsigned int bit = m - k1 / 2; bit < m; ++bit)
01214         {
01215           matrix[2 * bit - m + k1 - m].flip(bit);
01216           matrix[2 * bit - m + k1 - m + k].flip(bit);
01217           matrix[2 * bit - m + k1 - m + k1].flip(bit);
01218           matrix[2 * bit - m + k1 - m + k2].flip(bit);
01219         }
01220         for (unsigned int bit = m - k2 / 2; bit < m; ++bit)
01221         {
01222           matrix[2 * bit - m + k2 - m].flip(bit);
01223           matrix[2 * bit - m + k2 - m + k].flip(bit);
01224           matrix[2 * bit - m + k2 - m + k1].flip(bit);
01225           matrix[2 * bit - m + k2 - m + k2].flip(bit);
01226         }
01227       }
01228 
01229       bitset<m> pivotted;
01230       pivotted.reset();
01231 
01232       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01233 
01234       // Next, wipe it, so that the left half becomes I.
01235       // The first half is easy.
01236       for (unsigned int wipecol = 1; wipecol < (m + 1) / 2; ++wipecol)
01237       {
01238         matrix[2 * wipecol] ^= matrix[wipecol];
01239 #if LIBECC_INPLACE
01240         matrix[2 * wipecol].set(wipecol);               // Store the inverse in-place, destroying the original.
01241 #endif
01242       }
01243 
01244       // The second half is not.  Use Gauss-Jordan here.
01245       // Note that pivotting is hardly necessary because our arithmetic is infinitely accurate,
01246       // but we still need to find a '1' when we encounter a '0' on the main diagonal of course.
01247       // There will always be at least one '1' in every column, so that partial pivotting suffices
01248       // (speeding up things obviously), with the exception of the case where that '1' is only
01249       // found in row 0 (which is our 'singular' row and needs some special attention).
01250       // The row swapping is done because it is needed if we want to do our work "in-place",
01251       // reducing the amount of memory needed with a factor of two.
01252 
01253       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01254 
01255       unsigned int rowswaps[m];
01256       rowswaps[0] = 0;
01257       unsigned int colswaps[m], colswaps_inverse[m];
01258       for (int row = 0; row < m; ++row)
01259       {
01260         colswaps[row] = row;
01261         colswaps_inverse[row] = row;
01262       }
01263 
01264       // Run over all remaining columns and wipe them, immedeately replacing them with the result
01265       // since once a column is wiped we don't need its contents anymore.  Moreover, while wiping
01266       // the column it is optionally swapped with another column at the same time.  This, of course,
01267       // is only done to make the code not understandable anymore for you.
01268 #if LIBECC_SWAPCOLUMNS
01269       for (unsigned int colcnt = (m + 1) / 2; colcnt < m; ++colcnt)
01270 #else
01271       for (unsigned int wipecol = (m + 1) / 2; wipecol < m; ++wipecol)
01272 #endif
01273       {
01274 #if LIBECC_SWAPCOLUMNS
01275         // Find the next row that wasn't already wiped.
01276         unsigned int wipecol = colswaps[colcnt];
01277 #if ECC_DEBUG
01278         LibEccDout(dc::gaussj, "colcnt = " << colcnt);
01279         for (int row = 0; row < m; ++row)
01280         {
01281           LibEccDout(dc::gaussj, "colswaps[" << row << "] = " << colswaps[row] << "\t\tcolswaps_inverse[" << row << "] = " << colswaps_inverse[row]);
01282           assert(colswaps[colswaps_inverse[row]] == row);
01283           assert(colswaps_inverse[colswaps[row]] == row);
01284         }
01285         LibEccDout(dc::polynomial|noprefix_cf, "");
01286 #endif
01287 #endif
01288 
01289         // First find a suitable row to wipe with.
01290         // This searching is called 'pivotting'.
01291         LibEccDout(dc::gaussj, "Searching for suitable row to wipe with in column " << wipecol);
01292         unsigned int pivotrow;
01293         if (!matrix[wipecol].test(wipecol) || pivotted.test(wipecol))
01294         {
01295           for (pivotrow = wipecol;;)
01296           {
01297             if (++pivotrow == m)
01298             {
01299               if (matrix[0].test(wipecol) && !pivotted.template test<0>())
01300                 pivotrow = 0;
01301               else
01302               {
01303                 for (pivotrow = (m + 1) / 2; pivotrow < wipecol; ++pivotrow)
01304                   if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01305                     break;
01306               }
01307               if (pivotrow == wipecol)
01308               {
01309                 // This happens when we swapped with column 0 (which is all zeroes), for example when m == 14.
01310                 // Just ignore this column.
01311                 pivotrow = m;                   // Flag that we need to continue the main loop.
01312                 pivotted.set(wipecol);
01313                 matrix[wipecol].set(wipecol);   // Copy identity matrix over.
01314                 break;
01315               }
01316             }
01317             if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01318               break;
01319           }
01320           if (pivotrow == m)
01321             continue;
01322         }
01323         else
01324           pivotrow = wipecol;
01325         LibEccDout(dc::gaussj, "Using row " << pivotrow << " to wipe column " << wipecol);
01326         LibEccDout(dc::gaussj, "Before:");
01327         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01328         pivotted.set(pivotrow);
01329 #if LIBECC_SWAPCOLUMNS
01330         rowswaps[colcnt] = pivotrow;
01331         LibEccDout(dc::gaussj, "Setting rowswaps[" << colcnt << "] to " << pivotrow);
01332 #else
01333         rowswaps[wipecol] = pivotrow;                   // We temporarily use row 'pivotrow' to store row 'wipecol'.
01334         LibEccDout(dc::gaussj, "Setting rowswaps[" << wipecol << "] to " << pivotrow);
01335 #endif
01336         if (pivotrow == wipecol)
01337         {
01338 #if LIBECC_INPLACE
01339           matrix[pivotrow].set(wipecol);                // Store the inverse in-place, destroying the original.
01340 #endif
01341           for (unsigned int row = 0; row < m; ++row)
01342           {
01343             if (row == pivotrow)
01344               continue;
01345             if (matrix[row].test(wipecol))
01346             {
01347 #if LIBECC_INPLACE
01348               matrix[row].clear(wipecol);               // Store the inverse in-place, destroying the original.
01349 #endif
01350               matrix[row] ^= matrix[pivotrow]; 
01351             }
01352           }
01353         }
01354         else
01355         {
01356           // This block contains the main magic.  It's hard to understand I am afraid.
01357           // Basically this does the same as the code block above, but at the same time
01358           // swaps the columns 'wipecol' and 'pivotrow'.
01359 
01360 #if LIBECC_SWAPCOLUMNS
01361           // Swap pivot row bits, and set the bit in pivotrow (thats the identity matrix bit).
01362           if (matrix[pivotrow].test(pivotrow) != matrix[pivotrow].test(wipecol))
01363           {
01364             matrix[pivotrow].flip(wipecol);
01365 #if !LIBECC_INPLACE
01366             matrix[pivotrow].flip(pivotrow);    // No need to flip the 'pivotrow' column when we set it in the next line.
01367 #endif
01368           }
01369 #endif
01370 #if LIBECC_INPLACE
01371           matrix[pivotrow].set(pivotrow);               // Store the inverse in-place, destroying the original.
01372 #endif
01373           for (unsigned int row = 0; row < m; ++row)
01374           {
01375             if (row == pivotrow)                        // Don't wipe the row that we use to wipe.
01376               continue;
01377             matrixrow_type& mrow = matrix[row];
01378             if (mrow.test(wipecol))
01379             {
01380 #if LIBECC_SWAPCOLUMNS
01381               if (!mrow.test(pivotrow))         // If the value in the two columns differ,
01382               {
01383                 mrow.clear(wipecol);            // swap the two values.
01384 #if !LIBECC_INPLACE
01385                 mrow.set(pivotrow);             // No need to set pivotrow when it is overwritten in the next line.
01386 #endif
01387               }
01388 #endif
01389 #if LIBECC_INPLACE
01390               mrow.clear(pivotrow);             // Store the inverse in-place, destroying the original.
01391                                                 // This represents a 0 from the identity matrix.
01392 #endif
01393               mrow ^= matrix[pivotrow];         // Ok, now the columns have been swapped and to-be-wiped column
01394                                                 // has been replaced with a clean identity matrix bit. Perform
01395                                                 // the actual wiping.
01396             }
01397 #if LIBECC_SWAPCOLUMNS
01398             else if (mrow.test(pivotrow))       // Are the pivotrow and wipecol different?
01399             {
01400               mrow.set(wipecol);                // Then flip both, exchanging them effectively. If they
01401               mrow.clear(pivotrow);             //   were the same, consider them exchanged anyway.
01402             }
01403 #endif
01404           }
01405 #if LIBECC_SWAPCOLUMNS
01406           LibEccDout(dc::gaussj, "Also swapped columns " << pivotrow << " and " << wipecol);
01407           // Keep colswaps up to date.  We need colswaps_inverse to do that, therefore
01408           // we need to keep colswaps_inverse up to date too.
01409           std::swap(colswaps[colswaps_inverse[wipecol]], colswaps[colswaps_inverse[pivotrow]]);
01410           std::swap(colswaps_inverse[wipecol], colswaps_inverse[pivotrow]);
01411 #endif
01412         }
01413         LibEccDout(dc::gaussj, "After:");
01414         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01415       }
01416 
01417 #if ECC_DEBUG
01418       for (int i = 0; i < m; ++i)
01419       {
01420         if (rowswaps[i] != i)
01421           LibEccDout(dc::gaussj, i << " : " << rowswaps[i]);
01422         // Skip the first half of the matrix.
01423         if (i == 0)
01424           i = (m + 1) / 2 - 1;
01425       }
01426       LibEccDout(dc::gaussj|noprefix_cf, "");
01427 #endif
01428 
01429       if (pivotted.test(0))
01430       {
01431         int row0 = (m + 1) / 2;
01432         while (pivotted.test(row0))
01433           ++row0;
01434         rowswaps[0] = row0;
01435         pivotted.set(row0);
01436       }
01437 
01438       // Next perform some row rotations, in order to get all rows on their correct places again.
01439       for (int i = 0; i < m; ++i)
01440       {
01441         if (rowswaps[i] != i)
01442         {
01443           int j = i;
01444           bitset<2 * m> temp = matrix[j];
01445           LibEccDout(dc::gaussj|continued_cf, j);
01446           do
01447           {
01448             matrix[j] = matrix[rowswaps[j]];
01449             LibEccDout(dc::continued, " <-- " << rowswaps[j]);
01450             j = rowswaps[j];
01451           }
01452           while (rowswaps[j] != i);
01453           matrix[j] = temp;
01454           LibEccDout(dc::finish, " <-- " << i);
01455           j = i;
01456           do
01457           {
01458             int pj = j;
01459             j = rowswaps[pj];
01460             // Update the administration so that we won't try to rotate them again.
01461             rowswaps[pj] = pj;
01462           }
01463           while (j != i);
01464         }
01465         // Skip the first half of the matrix.
01466         if (i == 0)
01467           i = (m + 1) / 2 - 1;
01468       }
01469 
01470       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01471       matrix_initialized = true;
01472     }
01473 
01474     // Multiply the matrix with cdb2.
01475     for (unsigned int row = 0; row < m; ++row)
01476     {
01477 #if LIBECC_AUGMENTED
01478 #if LIBECC_INPLACE
01479       bitset<m> tmp = matrix[row];
01480 #else
01481       bitset<2 * m> tmp2;
01482       matrix[row].template shift_op<m, right, assign>(tmp2);
01483       bitset<m> tmp = tmp2;
01484 #endif
01485       tmp &= cdb2.get_bitset();
01486 #else
01487       bitset<m> tmp = matrix[row] & cdb2.get_bitset();
01488 #endif
01489       if (tmp.odd())
01490         M_coefficients.set(row);
01491     }
01492 
01493     // Finally, multiply with b to get x.
01494     *this *= b;
01495   }
01496 
01497 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01498   inline bool
01499   operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01500   {
01501     return p1.M_coefficients == p2.M_coefficients;
01502   }
01503 
01504 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01505   inline bool
01506   operator==(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01507   {
01508     return polynomial<m, k, k1, k2>(expr) == p2;
01509   }
01510 
01511 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01512   inline bool
01513   operator==(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01514   {
01515     return p1 == polynomial<m, k, k1, k2>(expr);
01516   }
01517 
01518 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01519   inline bool
01520   operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01521   {
01522     return p1.M_coefficients != p2.M_coefficients;
01523   }
01524 
01525 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01526   inline bool
01527   operator!=(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01528   {
01529     return polynomial<m, k, k1, k2>(expr) != p2;
01530   }
01531 
01532 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01533   inline bool
01534   operator!=(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01535   {
01536     return p1 != polynomial<m, k, k1, k2>(expr);
01537   }
01538 
01539 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01540   inline typename polynomial<m, k, k1, k2>::xor_type
01541   operator+(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01542   {
01543     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01544   }
01545 
01546 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01547   inline typename polynomial<m, k, k1, k2>::xor_type
01548   operator-(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01549   {
01550     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01551   }
01552 
01553 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01554   inline polynomial<m, k, k1, k2>
01555   operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01556   {
01557     polynomial<m, k, k1, k2> result;
01558     p1.multiply_with(p2, result.M_coefficients);
01559     return result;
01560   }
01561 
01562 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01563   inline polynomial<m, k, k1, k2>
01564   operator*(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01565   {
01566     return polynomial<m, k, k1, k2>(expr) * p2;
01567   }
01568 
01569 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01570   inline polynomial<m, k, k1, k2>
01571   operator*(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01572   {
01573     return p1 * polynomial<m, k, k1, k2>(expr);
01574   }
01575 
01576 
01577 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01578   inline polynomial<m, k, k1, k2>
01579   operator/(polynomial<m, k, k1, k2> const& e1, polynomial<m, k, k1, k2> const& e2)
01580   {
01581     polynomial<m, k, k1, k2> tmp(e1);
01582     tmp /= e2;
01583     return tmp;
01584   }
01585 
01586 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01587   inline polynomial<m, k, k1, k2>
01588   operator/(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01589   {
01590     return polynomial<m, k, k1, k2>(expr) / p2;
01591   }
01592 
01593 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01594   inline polynomial<m, k, k1, k2>
01595   operator/(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01596   {
01597     return p1 / polynomial<m, k, k1, k2>(expr);
01598   }
01599 
01600 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01601   std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2> const& p)
01602   {
01603     p.M_coefficients.base2_print_on(os);
01604     return os;
01605   }
01606 
01607 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01608   std::ostream& operator<<(std::ostream& os, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01609   {
01610     polynomial<m, k, k1, k2> p(expr);
01611     p.M_coefficients.base2_print_on(os);
01612     return os;
01613   }
01614 
01615 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01616   bool polynomial<m, k, k1, k2>::S_normal_initialized;
01617 
01618 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01619   bitset<m> polynomial<m, k, k1, k2>::S_normal;
01620  
01621 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01622   void polynomial<m, k, k1, k2>::calculate_normal(void)
01623   {
01624 #if 0
01625     bitset<m> single_bit(1);
01626     polynomial trace;
01627     bitset_digit_t nextfrob1_buf[square_digits];
01628     bitset_digit_t nextfrob2_buf[square_digits];
01629     polynomial* nextfrob1;
01630     polynomial* nextfrob2;
01631     for (int bit = 0; bit < m; ++bit)
01632     {
01633       trace = single_bit;
01634       nextfrob1 = &trace.square(nextfrob1_buf);
01635       for (int i = 0; i < (m - 1) / 2; ++i)
01636       {
01637         nextfrob2 = &nextfrob1->square(nextfrob2_buf);
01638         trace += *nextfrob1 + *nextfrob2;
01639         if ((m & 1) && i == (m - 3) / 2)
01640           break;
01641         nextfrob1 = &nextfrob2->square(nextfrob1_buf);
01642       }
01643       if (!(m & 1))
01644         trace += *nextfrob1;
01645       if (trace.get_bitset().template test<0>())
01646         S_normal.set(bit);
01647       single_bit.template shift_op<1, libecc::left, libecc::assign>(single_bit);
01648     }
01649 #else
01650     // We can do that faster... I didn't prove this yet, but it works.
01651     if ((m & 1))
01652       S_normal.template set<0>();
01653     if (((m - k) & 1))
01654       S_normal.template set<m - k>();
01655     if (k1)
01656     {
01657       if (((m - k1) & 1))
01658         S_normal.template set<m - k1>();
01659       if (((m - k2) & 1))
01660         S_normal.template set<m - k2>();
01661     }
01662 #endif
01663     S_normal_initialized = true;
01664   }
01665 
01666 } // namespace libecc
01667 
01668 #include <libecc/square.hcc>    // File with different copyright.
01669 
01670 #endif // LIBECC_POLYNOMIAL_H
Copyright © 2002-2004 Carlo Wood.  All rights reserved.