Problem with STL vector peformance, benchmarks included
I found that old post:
http://groups.google.com/group/comp.lang.c++/browse_frm/thread/3a2562c9a5f8998/15519204726d01e8?lnk=gst&q=vector+no+surprise&rnum=2#15519204726d01e8
I just erased the #include <kubux.....> lines.
****** old post for your convenince ********
You are right:
#include <vector>
#include <iostream>
#include <ctime>
#include <memory>
#include <kubux/bits/allocator.cc>
#include <kubux/bits/new_delete_allocator.cc>
#include <kubux/bits/malloc_free_allocator.cc>
template < typename T, typename Alloc = std::allocator<T> >
class stupid {
public:
typedef Alloc allocator;
typedef typename allocator::value_type value_type;
typedef typename allocator::size_type size_type;
typedef typename allocator::difference_type difference_type;
typedef typename allocator::pointer pointer;
typedef typename allocator::const_pointer const_pointer;
typedef typename allocator::reference reference;
typedef typename allocator::const_reference const_reference;
typedef pointer iterator;
typedef const_pointer
const_iterator;
typedef typename std::reverse_iterator< iterator >
reverse_iterator;
typedef typename std::reverse_iterator< const_iterator >
const_reverse_iterator;
private:
pointer ptr;
size_type the_size;
public:
stupid ( size_type length ) :
ptr ( new T [ length ] ),
the_size ( length )
{
for ( iterator iter = this->ptr;
iter != this->ptr + the_size;
++ iter ) {
::new( static_cast<void*>(iter) ) T();
}
}
~stupid ( void ) {
iterator iter = ptr + the_size;
while ( iter > ptr ) {
-- iter;
iter->~T();
}
{
allocator alloc;
alloc.deallocate( ptr, the_size );
}
the_size = 0;
}
reference operator[] ( size_type index ) {
return( this->ptr[ index ] );
}
const_reference operator[] ( size_type index ) const {
return( this->ptr[ index ] );
}
}; // stupid
int main ( void ) {
const unsigned long l = 50000000;
{
std::vector< int > v ( l );
std::clock_t loop_start = std::clock();
for ( unsigned long i = 0; i < l; ++i ) {
v[i] = 5;
}
std::clock_t loop_end = std::clock();
std::cout << "vector: " << loop_end - loop_start << std::endl;
}
{
int* v = new int [ l ];
std::fill_n(v, l, 0);
std::clock_t loop_start = std::clock();
for ( unsigned long i = 0; i < l; ++i ) {
v[i] = 5;
}
std::clock_t loop_end = std::clock();
std::cout << "array: " << loop_end - loop_start << std::endl;
}
{
stupid< int, std::allocator<int> > v ( l );
std::clock_t loop_start = std::clock();
for ( unsigned long i = 0; i < l; ++i ) {
v[i] = 5;
}
std::clock_t loop_end = std::clock();
std::cout << "stupid: " << loop_end - loop_start << std::endl;
}
{
std::vector<int> v ( l );
std::clock_t loop_start = std::clock();
for ( std::vector<int>::iterator i = v.begin();
i != v.end(); ++i ) {
*i = 5;
}
std::clock_t loop_end = std::clock();
std::cout << "ptr: " << loop_end - loop_start << std::endl;
}
{
int* v = new int [ l ];
std::fill_n(v, l, 0);
std::clock_t loop_start = std::clock();
for ( int* i = v; i < v+l; ++i ) {
*i = 5;
}
std::clock_t loop_end = std::clock();
std::cout << "ptr: " << loop_end - loop_start << std::endl;
}
}
a.out
vector: 320000
array: 320000
stupid: 350000
iterator: 340000
ptr: 340000
No surprises anymore.
Thanks
Kai-Uwe Bux
***************************************************
I ran the reported test on visual studio professional 2005 with its
standard STL implementation, which should be supplyed by Dinkumware.
My cpu is a dual core t2500 with 2gb ddr2.
I tryed both the intel 9.1 compiler and the Microsoft one.
In both cases I used the O3 optimizations, release mode, and with the
Intel one I also tryed the /Qansi_alias /Qipo options.
Results:
Microsoft:
vector: 141
array: 94
stupid: 93
ptr: 172
ptr: 78
Intel:
vector: 312
array: 156 // becomes 45 if I require P4 extensions, other values
remains nearly the same
stupid: 157
ptr: 1047
ptr: 156
I admit I'm quite disappointed wit the reults obtained with the Intel
compiler.
Is there any fault in the way the tast was conducted or with the
source code I posted?
If everything is correct, how could I investigate where is the
problem?
Cheers
StephQ