Re: convert 32bit numbers to 64bit (or float to double)
Hello,
Shorten it to the minimum and post it here. Most of us don't click on
links. At least I don't.
Sorry, I thought nobody would read long code without syntax highlighting.
#include <cmath>
#include <iomanip>
#include <iostream>
#include <vector>
using namespace std;
// some constants from IEEE 754
const int nBitsSingleMantissa = 23;
const int nBitsSingleExzess = 8;
const int nBitsDoubleMantissa = 52;
const int nBitsDoubleExzess = 11;
// old method using by another cpp application
// it is my reference method
double convertWithCast(double value) {
float x = value;
return (double)x;
}
// try to simulate the same behaviour without using floats
struct IEEEBinary {
int signedBit;
vector<int > exzess;
vector<int > mantissa;
};
vector<int > swapVectorOrder(const vector<int >& x) {
vector<int > y;
for (int i=x.size()-1; i >= 0; --i) {
y.push_back(x[i]);
}
return y;
}
double calcExzess(int nEBits) {
return pow(2, nEBits-1)-1;
}
IEEEBinary double2binary(double x, int nMBits, int nEBits) {
// calculate mantissa
// before point
int pre = floor(abs(x));
vector<int > preMantissa;
while (pre != 0) {
preMantissa.push_back(pre % 2);
pre = floor(pre/2.0);
}
if (preMantissa.size() > 1)
preMantissa = swapVectorOrder(preMantissa);
// after point
double post = x - floor(x);
vector<int > postMantissa;
for (unsigned int i=0; i<2*nMBits; ++i) {
post = post * 2;
int pre = floor(post);
postMantissa.push_back(pre);
post -= pre;
}
vector<int > mantissa = preMantissa;
mantissa.insert(mantissa.end(), postMantissa.begin(), postMantissa.end());
// normalize
vector<int >::iterator it;
for (it = mantissa.begin(); it != mantissa.end(); ++it) {
if (*it == 1)
break;
}
// save size for exzess calc
unsigned int sMantissa = mantissa.size();
// remove leading zeros and first 1
it = mantissa.erase(mantissa.begin(), (it+1));
// save new size for exzess calc
unsigned int sMantissa2 = mantissa.size();
// round
if (mantissa.at(nMBits+1) == 1) {
mantissa.at(nMBits) = 1;
}
// cut
mantissa.erase(it+nMBits, mantissa.end());
//mantissa.erase(mantissa.end());
// exzess
int ex = calcExzess(nEBits) + preMantissa.size() - (sMantissa-sMantissa2);
vector<int > exzess;
while (ex != 0) {
exzess.push_back(ex % 2);
ex = floor(ex/2.0);
}
// append zeros to exzess
if (exzess.size() < nEBits) {
for (unsigned int i=exzess.size(); i<nEBits; ++i)
exzess.push_back(0);
}
exzess = swapVectorOrder(exzess);
// signed bit
int signedBit = 0;
if (x < 0) {
signedBit = 1;
}
// build binary struct
IEEEBinary bin;
bin.signedBit = signedBit;
bin.mantissa = mantissa;
bin.exzess = exzess;
return bin;
}
double binary2double(const IEEEBinary& binary) {
int exzess = 0;
for (unsigned int i = 0; i < binary.exzess.size(); ++i)
exzess += binary.exzess[i]*pow(2, binary.exzess.size()-(i+1));
exzess -= calcExzess(binary.exzess.size());
double value = pow(2, exzess);
for (unsigned int i = 0; i < binary.mantissa.size(); ++i) {
value += binary.mantissa[i]*pow(2, exzess-(int)(i+1));
}
if (binary.signedBit == 1)
value *= (-1);
return value;
}
// wrapper function
double convertWithoutCast(double value) {
return binary2double(double2binary(value, nBitsSingleMantissa,
nBitsSingleExzess));
}
int main() {
vector<double > testValues;
testValues.push_back(1.0/3.0);
testValues.push_back(18.4);
testValues.push_back(0.1);
testValues.push_back(999.4813232421875);
for (vector<double >::iterator it=testValues.begin(); it !=
testValues.end(); ++it) {
double oldConv = convertWithCast(*it);
double newConv = convertWithoutCast(*it);
if (oldConv != newConv) {
cout << setprecision(22) << *it << ": " << oldConv << " != " <<
newConv << endl;
}
}
return 0;
}
// the output:
0.3333333333333333148296:
0.3333333432674407958984 != 0.3333333134651184082031
0.1000000000000000055511:
0.1000000014901161193848 != 0.09999999403953552246094
// it works for
18.4 and 999.4813232421875
I think, I do something wrong because the old method with typical c-cast
returns a different value in comparison to my new method without c-cast.
Kind regards,
Sebastian