Re: convert 32bit numbers to 64bit (or float to double)

From:

Victor Bazarov <v.Abazarov@comAcast.net>

Newsgroups:

comp.lang.c++

Date:

Sat, 26 Jun 2010 14:57:02 -0400

Message-ID:

<i05iib$f20$1@news.datemas.de>

On 6/26/2010 2:15 PM, Sebastian Gibb wrote:

#include<cmath>
#include<iomanip>
#include<iostream>
#include<vector>

using namespace std;

// some constants from IEEE 754
const int nBitsSingleMantissa = 23;
const int nBitsSingleExzess = 8;
const int nBitsDoubleMantissa = 52;
const int nBitsDoubleExzess = 11;

// old method using by another cpp application
// it is my reference method
double convertWithCast(double value) {
   float x = value;
   return (double)x;
}

// try to simulate the same behaviour without using floats
struct IEEEBinary {
   int signedBit;
   vector<int> exzess;
   vector<int> mantissa;
};

vector<int> swapVectorOrder(const vector<int>& x) {
   vector<int> y;
   for (int i=x.size()-1; i>= 0; --i) {
     y.push_back(x[i]);
   }
   return y;
}

double calcExzess(int nEBits) {
   return pow(2, nEBits-1)-1;
}

IEEEBinary double2binary(double x, int nMBits, int nEBits) {
   // calculate mantissa
   // before point
   int pre = floor(abs(x));
   vector<int> preMantissa;

   while (pre != 0) {
     preMantissa.push_back(pre % 2);
     pre = floor(pre/2.0);
   }

   if (preMantissa.size()> 1)
     preMantissa = swapVectorOrder(preMantissa);

   // after point
   double post = x - floor(x);
   vector<int> postMantissa;
   for (unsigned int i=0; i<2*nMBits; ++i) {
     post = post * 2;
     int pre = floor(post);
     postMantissa.push_back(pre);
     post -= pre;
   }

   vector<int> mantissa = preMantissa;
   mantissa.insert(mantissa.end(), postMantissa.begin(), postMantissa.end());

   // normalize
   vector<int>::iterator it;

   for (it = mantissa.begin(); it != mantissa.end(); ++it) {
     if (*it == 1)
       break;
   }
   // save size for exzess calc
   unsigned int sMantissa = mantissa.size();
   // remove leading zeros and first 1
   it = mantissa.erase(mantissa.begin(), (it+1));
   // save new size for exzess calc
   unsigned int sMantissa2 = mantissa.size();

   // round
   if (mantissa.at(nMBits+1) == 1) {
     mantissa.at(nMBits) = 1;
   }

   // cut
   mantissa.erase(it+nMBits, mantissa.end());
   //mantissa.erase(mantissa.end());

   // exzess
   int ex = calcExzess(nEBits) + preMantissa.size() - (sMantissa-sMantissa2);

   vector<int> exzess;

   while (ex != 0) {
     exzess.push_back(ex % 2);
     ex = floor(ex/2.0);
   }

   // append zeros to exzess
   if (exzess.size()< nEBits) {
     for (unsigned int i=exzess.size(); i<nEBits; ++i)
       exzess.push_back(0);
   }

   exzess = swapVectorOrder(exzess);

   // signed bit
   int signedBit = 0;

   if (x< 0) {
     signedBit = 1;
   }

   // build binary struct
   IEEEBinary bin;
   bin.signedBit = signedBit;
   bin.mantissa = mantissa;
   bin.exzess = exzess;

   return bin;
}

double binary2double(const IEEEBinary& binary) {
   int exzess = 0;

   for (unsigned int i = 0; i< binary.exzess.size(); ++i)
     exzess += binary.exzess[i]*pow(2, binary.exzess.size()-(i+1));

   exzess -= calcExzess(binary.exzess.size());

   double value = pow(2, exzess);

   for (unsigned int i = 0; i< binary.mantissa.size(); ++i) {
     value += binary.mantissa[i]*pow(2, exzess-(int)(i+1));
   }

   if (binary.signedBit == 1)
     value *= (-1);

   return value;
}

// wrapper function
double convertWithoutCast(double value) {
   return binary2double(double2binary(value, nBitsSingleMantissa,
nBitsSingleExzess));
}

int main() {
   vector<double> testValues;
   testValues.push_back(1.0/3.0);
   testValues.push_back(18.4);
   testValues.push_back(0.1);
   testValues.push_back(999.4813232421875);

   for (vector<double>::iterator it=testValues.begin(); it !=
testValues.end(); ++it) {
     double oldConv = convertWithCast(*it);
     double newConv = convertWithoutCast(*it);

     if (oldConv != newConv) {
       cout<< setprecision(22)<< *it<< ": "<< oldConv<< " != "<<
newConv<< endl;
     }
   }

   return 0;
}

Apparently it either contains hardware-specific code (which I don't see
right away) or contains a logical error (for which, while on vacation, I
really don't care to search) - when I took your code and tried debugging
it with VC10, I got first of all some errors I needed to correct (mostly
the use of an ambiguous 'pow'), and second of all, a debugging assertion
failed in one of the functions, the iterator was out of bounds.

Your code is overly complex, I believe. And it doesn't seem to contain
any test cases. Consider writing test cases, like expecting a zeroed
mantissa with a power of 2, and a particular mantissa. When you split
your number into the mantissa and "exzess" (exponent), you really need
to make sure your splitting code works right before relying on it for
your "conversion".

Good luck!

V
--
Please remove capital 'A's when replying by e-mail
I do not respond to top-posted replies, please don't ask