Re: how space-efficient is bit_vector? bitset?
This is a multi-part message in MIME format.
--------------050609020504000603020101
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Markus Dehmann wrote:
I am planning to have tens of thousands of bit vectors (or bitsets).
Each of them has about 20,000 bits, but it's sparse: In a given
vector, only 20-30 bits will be set. What data structure would be best
for space (and probably also time) efficiency? Is std::bitset better
than std::bit_vector (vector<bool>)? Can I trust that the used space
for each of the bitvectors will be in the order of 20-30 bits (i.e.
the expected number of bits set), or rather 20,000 bits (the constant
number of bits that each vector has)? Or are there other, better
classes to use? Sparse Matrix classes?
You could use map. I have attached the Austria C++ "rangemap" that can
be used as a sparse bitmap.
What kind of operations do you need to have on your bitmap ?
--------------050609020504000603020101
Content-Type: text/plain;
name="at_rangemap.h"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="at_rangemap.h"
//
// The Austria library is copyright (c) Gianni Mariani 2004.
//
// Grant Of License. Grants to LICENSEE the non-exclusive right to use the Austria
// library subject to the terms of the LGPL.
//
// A copy of the license is available in this directory or one may be found at this URL:
// http://www.gnu.org/copyleft/lesser.txt
//
/**
* at_rangemap.h
*
*/
#ifndef x_at_rangemap_h_x
#define x_at_rangemap_h_x 1
#include "at_exports.h"
#include "at_os.h"
#include "at_assert.h"
#include <map>
// Austria namespace
namespace at
{
// ======== TypeRange =================================================
/**
* TypeRange describes the range of a particular type
*
*/
template <typename w_RangeType>
class TypeRange
{
public:
// range type
typedef w_RangeType t_RangeType;
// ======== Adjacent ==============================================
/**
* Adjacent returns true if the two parameters are "one apart"
*
* @param i_lesser is the lesser of the two values
* @param i_greater is the greater of the two
* @return true is no other elements exist between i_lesser and i_greater
*/
static bool Adjacent(
const t_RangeType & i_lesser,
const t_RangeType & i_greater
) {
t_RangeType l_greater_less( i_greater );
-- l_greater_less; // go to the earlier element
// deal with wrapping
if ( i_greater < l_greater_less )
{
return false;
}
return !( i_lesser < l_greater_less );
}
};
// ======== RangeMap ==================================================
/**
* RangeMap is a template that defines ranges.
*
*/
template <typename w_RangeType, typename w_RangeTraits=TypeRange<w_RangeType> >
class RangeMap
{
public:
// range type
typedef w_RangeType t_RangeType;
typedef w_RangeTraits t_RangeTraits;
// index on the end of the range
typedef std::map< t_RangeType, t_RangeType > t_Map;
typedef typename t_Map::iterator t_Iterator;
// ======== AddRange ==============================================
/**
* Add a segment to the range.
*
* @param i_begin The beginning of the range (inclusive)
* @param i_end The end of the range (inclusive)
* @return nothing
*/
void AddRange( const t_RangeType & i_begin, const t_RangeType & i_end )
{
const bool l_less_than( i_end < i_begin );
const t_RangeType & l_begin = ! l_less_than ? i_begin : i_end;
const t_RangeType & l_end = l_less_than ? i_begin : i_end;
// deal with an empty map here
if ( m_map.empty() )
{
// shorthand adding the first element into the map
m_map[ l_end ] = l_begin;
return;
}
// see if there is a segment to merge - find the element that preceeds
// l_begin
t_Iterator l_begin_bound = m_map.lower_bound( l_begin );
if ( l_begin_bound == m_map.end() )
{
// l_begin is after the last element
-- l_begin_bound;
if ( t_RangeTraits::Adjacent( l_begin_bound->first, l_begin ) )
{
// yes, they are mergable
t_RangeType l_temp = l_begin_bound->second;
m_map.erase( l_begin_bound );
m_map[ l_end ] = l_temp;
return;
}
// not mergable - add the segment at the end
m_map[ l_end ] = l_begin;
return;
}
// if the end of the segment being inserted is not beyond this one
if ( ( l_end < l_begin_bound->second ) && ! t_RangeTraits::Adjacent( l_end, l_begin_bound->second ) )
{
// NOT mergable with subsequent segments
if ( l_begin_bound == m_map.begin() )
{
// There is no previous segment
m_map[ l_end ] = l_begin;
return;
}
// The segment being inserted can't be merged at the end
// see if it can be merged with the previous one
t_Iterator l_previous = l_begin_bound;
-- l_previous;
AT_Assert( l_previous->first < l_begin );
if ( ! t_RangeTraits::Adjacent( l_previous->first, l_begin ) )
{
// not overlapping with previous and not mergable
m_map[ l_end ] = l_begin;
return;
}
else
{
// we are mergable with the previous element
// yes, they are mergable
t_RangeType l_temp = l_previous->second;
m_map.erase( l_previous );
m_map[ l_end ] = l_temp;
return;
}
}
if ( l_begin_bound == m_map.begin() )
{
if ( l_end < l_begin_bound->first )
{
if ( l_end < l_begin_bound->second )
{
if ( t_RangeTraits::Adjacent( l_end, l_begin_bound->second ) )
{
l_begin_bound->second = l_begin;
return;
}
else
{
m_map[ l_end ] = l_begin;
return;
}
}
else
{
if ( l_begin < l_begin_bound->second )
{
l_begin_bound->second = l_begin;
}
return;
}
}
else
{
t_RangeType l_new_begin = l_begin;
if ( l_begin_bound->second < l_begin )
{
l_new_begin = l_begin_bound->second;
}
// Check to see what segment is close to the end
t_Iterator l_end_bound = m_map.lower_bound( l_end );
if ( l_end_bound == m_map.end() )
{
// erase all the segments from l_previous to the end and
// replace with one
m_map.erase( l_begin_bound, l_end_bound );
m_map[ l_end ] = l_new_begin;
return;
}
if ( l_end < l_end_bound->second && ! t_RangeTraits::Adjacent( l_end, l_end_bound->second ) )
{
m_map.erase( l_begin_bound, l_end_bound );
m_map[ l_end ] = l_new_begin;
return;
}
// merge with the current end
m_map.erase( l_begin_bound, l_end_bound ); // erase segments in between
l_end_bound->second = l_new_begin;
return;
}
}
if ( l_begin_bound == m_map.begin() )
{
// no previous ranges
// see if we can merge with the current range
}
// find the previous iterator
t_Iterator l_previous = l_begin_bound;
-- l_previous;
t_RangeType l_new_begin = l_begin;
if ( t_RangeTraits::Adjacent( l_previous->first, l_begin ) )
{
l_new_begin = l_previous->second;
}
else
{
++ l_previous;
if ( l_previous->second < l_new_begin )
{
l_new_begin = l_previous->second;
}
}
t_RangeType l_new_end = l_end;
// Check to see what segment is close to the end
t_Iterator l_end_bound = m_map.lower_bound( l_end );
if ( l_end_bound == m_map.end() )
{
// erase all the segments from l_previous to the end and
// replace with one
m_map.erase( l_previous, l_end_bound );
m_map[ l_end ] = l_new_begin;
return;
}
if ( l_end < l_end_bound->second && ! t_RangeTraits::Adjacent( l_end, l_end_bound->second ) )
{
m_map.erase( l_previous, l_end_bound );
m_map[ l_end ] = l_new_begin;
return;
}
// merge with the current end
m_map.erase( l_previous, l_end_bound ); // erase segments in between
l_end_bound->second = l_new_begin;
return;
}
// ======== SubtractRange =========================================
/**
* SubtractRange removes the range. (opposite of Add)
*
*
* @param i_begin Beginning of range to subtract
* @param i_end End of range to subtract
* @return nothing
*/
void SubtractRange( const t_RangeType & i_begin, const t_RangeType & i_end )
{
const bool l_less_than( i_end < i_begin );
const t_RangeType & l_begin = ! l_less_than ? i_begin : i_end;
const t_RangeType & l_end = l_less_than ? i_begin : i_end;
// deal with an empty map here
if ( m_map.empty() )
{
// Nothing to remove
return;
}
// See if we find a segment
// l_begin
t_Iterator l_begin_bound = m_map.lower_bound( l_begin );
if ( l_begin_bound == m_map.end() )
{
// this does not cover any segments
return;
}
if ( l_begin_bound->second < l_begin )
{
// this segment is broken up
t_RangeType l_newend = l_begin;
-- l_newend;
m_map[ l_newend ] = l_begin_bound->second;
l_begin_bound->second = l_begin;
}
t_Iterator l_end_bound = m_map.lower_bound( l_end );
if ( l_end_bound == m_map.end() )
{
// erase all the segments from the beginning to end
m_map.erase( l_begin_bound, l_end_bound );
return;
}
if ( !( l_end < l_end_bound->first ) )
{
// the segment end must be equal the segment given
++ l_end_bound;
m_map.erase( l_begin_bound, l_end_bound );
return;
}
// need to break up the final segment
m_map.erase( l_begin_bound, l_end_bound );
if ( !( l_end < l_end_bound->second ) )
{
t_RangeType l_newbegin = l_end;
++ l_newbegin;
l_end_bound->second = l_newbegin;
}
return;
}
// ======== IsSet =================================================
/**
* Checks to see if the position is set
*
* @param i_pos
* @return True if the position is set
*/
bool IsSet( const t_RangeType & i_pos )
{
t_Iterator l_bound = m_map.lower_bound( i_pos );
if ( l_bound == m_map.end() )
{
// this does not cover any segments
return false;
}
return !( i_pos < l_bound->second );
}
t_Map m_map;
};
}; // namespace
#endif // x_at_rangemap_h_x
--------------050609020504000603020101--