Re: max<float>(NaN, 42.0f)?

From:

Ulrich Eckhardt <doomster@knuut.de>

Newsgroups:

comp.lang.c++.moderated

Date:

Tue, 15 Feb 2011 04:42:15 CST

Message-ID:

<8rul5rFfq7U1@mid.uni-berlin.de>

Ulrich Eckhardt wrote:

I've been wondering about how to most efficiently find the maximum of a
sequence of floating point values.

Firstly, thanks to all who responded. Since I promised to provide a
benchmark, here it comes. The results on my machine[1] are interesting,
they show that the na?ve approach is marginally slower (probably just my
imagination) than the two using less or less-equal comparisons. The
mentioned approach that uses pairs of values, sorts them and then
compares them to the existing limits does save a fraction of a
comparison per element but doesn't give any time benefits at all, it
performs worst.

Here's a typical run from the benchmark:

float (size=4):
get_area_naive took 0.124s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_inclusive took 0.159s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_exclusive took 0.133s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_pairs took 0.245s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
double (size=8):
get_area_naive took 0.199s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_inclusive took 0.270s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_exclusive took 0.189s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_pairs took 0.291s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
long double (size=16):
get_area_naive took 0.418s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_inclusive took 0.366s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_compare_exclusive took 0.335s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}
get_area_pairs took 0.516s
{{0.000163453, 999.999}, {5.97146e-05, 1000}}

The parts in curly brackets are the results. Not outputting them causes
the optimizer to remove large parts of the code and changes the timing
_a lot_.

The code[2] uses a POSIX timer to determine the runtime with a
millisecond resolution, on win32 I'd use timeGetTime() to get a similar
resolution. I've tried the same at work on a virtualized x86 machine and
it performs much differently, there the exclusive comparison performed
best, IIRC.

Cheers

Uli

[1] PowerPC-based MacMini running Debian and with GCC 4.4.5.

[2] #include <iostream>
#include <ostream>
#include <iomanip>
#include <algorithm>
#include <vector>
#include <limits>
#include <stdexcept>
#include <errno.h>

/* stopclock
Helper class to measure the time spent in a function.
TODO: Port to win32 using e.g. timeGetTime(). */
struct timer
{
    static timespec gettime()
    {
        timespec ts;
        if(clock_gettime(CLOCK_MONOTONIC, &ts))
            throw std::runtime_error("clock_gettime");
        return ts;
    }

    explicit timer(char const* where):
        m_where(where)
    {
        m_start = gettime();
    }
    ~timer()
    {
        timespec const end = gettime();
        time_t dsec = (end.tv_sec - m_start.tv_sec);
        long dnsec = (end.tv_nsec - m_start.tv_nsec);
        if(dnsec < 0)
        {
            dsec -= 1;
            dnsec += 1000000000L;
        }
        std::cout << m_where << " took "
            << dsec << '.'
            << std::setfill('0') << std::setw(3) << (dnsec + 500L) / 1000000L
            << "s"
            << std::endl;
    }
    char const* m_where;
    timespec m_start;
};

/* scalar range, delimited by a min and max value */
template<typename scalar_type>
struct range
{
    scalar_type min;
    scalar_type max;

    static range
    null()
    {
        range res =
        {
            std::numeric_limits<scalar_type>::infinity(),
            -std::numeric_limits<scalar_type>::infinity()
        };
        return res;
    }

    friend std::ostream&
    operator<<(std::ostream& out, range const& r)
    {
        return out << '{' << r.min << ", " << r.max << '}';
    }
};

/* 2D point */
template<typename scalar_type>
struct point
{
    scalar_type x;
    scalar_type y;

    static point
    create(scalar_type x, scalar_type y)
    {
        point const res = {x, y};
        return res;
    }
};

/* 2D area, delimited by min and max values on two axis' */
template<typename scalar_type>
struct area
{
    range<scalar_type> x;
    range<scalar_type> y;

    static area
    null()
    {
        area const res =
        {
            range<scalar_type>::null(),
            range<scalar_type>::null(),
        };
        return res;
    }

    friend std::ostream&
    operator<<(std::ostream& out, area const& a)
    {
        return out << '{' << a.y << ", " << a.x << '}';
    }
};

template<typename scalar_type>
area<scalar_type>
get_area_naive(std::vector<point<scalar_type> > const& points)
{
    timer t(__FUNCTION__);

    area<scalar_type> res = area<scalar_type>::null();
    for(size_t i=0; i!=points.size(); ++i)
    {
        res.x.min = std::min(res.x.min, points[i].x);
        res.x.max = std::max(res.x.max, points[i].x);
        res.y.min = std::min(res.y.min, points[i].y);
        res.y.max = std::max(res.y.max, points[i].y);
    }

    return res;
}

template<typename scalar_type>
area<scalar_type>
get_area_compare_exclusive(std::vector<point<scalar_type> > const& points)
{
    timer t(__FUNCTION__);

    if(points.empty())
        return area<scalar_type>::null();
    area<scalar_type> res = {{points[0].x, points[0].x}, {points[0].y, points[0].y}};
    for(size_t i=1, size=points.size(); i!=size; ++i)
    {
        point<scalar_type> const p = points[i];
        if(p.x < res.x.min)
            res.x.min = p.x;
        else if(p.x > res.x.max)
            res.x.max = p.x;
        if(p.y < res.y.min)
            res.y.min = p.y;
        else if(p.y > res.y.max)
            res.y.max = p.y;
    }

    return res;
}

template<typename scalar_type>
area<scalar_type>
get_area_compare_inclusive(std::vector<point<scalar_type> > const& points)
{
    timer t(__FUNCTION__);

    if(points.empty())
        return area<scalar_type>::null();
    area<scalar_type> res = {{points[0].x, points[0].x}, {points[0].y, points[0].y}};
    for(size_t i=1, size=points.size(); i!=size; ++i)
    {
        point<scalar_type> const p = points[i];
        if(p.x <= res.x.min)
            res.x.min = p.x;
        else if(p.x >= res.x.max)
            res.x.max = p.x;
        if(p.y <= res.y.min)
            res.y.min = p.y;
        else if(p.y >= res.y.max)
            res.y.max = p.y;
    }

    return res;
}

template<typename scalar_type>
area<scalar_type>
get_area_pairs(std::vector<point<scalar_type> > const& points)
{
    timer t(__FUNCTION__);

    area<scalar_type> res = area<scalar_type>::null();
    for(size_t i=0, size=points.size() / 2; i!=size; ++i)
    {
        point<scalar_type> const p0 = points[2 * i];
        point<scalar_type> const p1 = points[2 * i + 1];

        scalar_type xmin = std::min(p0.x, p1.x);
        scalar_type xmax = std::max(p0.x, p1.x);
        res.x.min = std::min(res.x.min, xmin);
        res.x.max = std::max(res.x.max, xmax);

        scalar_type ymin = std::min(p0.y, p1.y);
        scalar_type ymax = std::max(p0.y, p1.y);
        res.y.min = std::min(res.y.min, ymin);
        res.y.max = std::max(res.y.max, ymax);
    }

    return res;
}

template<typename scalar_type>
void
test()
{
    srand48(0);

    size_t const num_points = 0x00400000;
    scalar_type const scale = 1000.0f;

    std::vector<point<scalar_type> > points;
    points.reserve(num_points);
    for(size_t i=0; i!=num_points; ++i)
    {
        scalar_type const x = scale * drand48();
        scalar_type const y = scale * drand48();
        points.push_back(point<scalar_type>::create(x, y));
    }

    area<scalar_type> tmp;

    tmp = get_area_naive(points);
    std::cout << tmp << std::endl;
    tmp = get_area_compare_inclusive(points);
    std::cout << tmp << std::endl;
    tmp = get_area_compare_exclusive(points);
    std::cout << tmp << std::endl;
    tmp = get_area_pairs(points);
    std::cout << tmp << std::endl;
}

int
main()
{
    std::cout << "float (size=" << sizeof (float) << "):\n";
    test<float>();

    std::cout << "double (size=" << sizeof (double) << "):\n";
    test<double>();

    std::cout << "long double (size=" << sizeof (long double) << "):\n";
    test<long double>();
}

--
      [ See http://www.gotw.ca/resources/clcm.htm for info about ]
      [ comp.lang.c++.moderated. First time posters: Do this! ]