Re: Why is java consumer/producer so much faster than C++
I made a few changes to your code:
1. I only signal if the capacity becomes non-zero or non-full.
2. If the queue is the queue has got some items in it and the mutex is locked, the put thread yields to the take thread. If the queue has some free space in it and the mutex is locked, the take thread yields to the put thread.
3. Traffic in int instead of unique_ptr<int>.
#include <condition_variable>
#include <mutex>
#include <thread>
#include <deque>
#include <cstdlib>
template <class T>
class BlockingQueue{
public:
BlockingQueue(unsigned cap):capacity_(cap)
{
}
void put(T t)
{
std::unique_lock<std::mutex> lock(m_, std::try_to_lock);
int retry = 0;
while (!lock.owns_lock())
{
if (queue_.size() > capacity_/4 && ++retry < 1000)
{
std::this_thread::yield();
}
else
{
lock.lock();
}
}
while(queue_.size() >= capacity_)c_full_.wait(lock);
queue_.push_back(std::move(t));
if (queue_.size() == 1)
c_empty_.notify_one();
}
T take()
{
std::unique_lock<std::mutex> lock(m_, std::try_to_lock);
int retry = 0;
while (!lock.owns_lock())
{
if (queue_.size() < 3*capacity_/4 && ++retry < 1000)
{
std::this_thread::yield();
}
else
{
lock.lock();
}
}
while(queue_.empty())c_empty_.wait(lock);
T tmp = std::move(queue_.front());
queue_.pop_front();
if (queue_.size() == capacity_-1)
c_full_.notify_one();
return tmp;
}
bool empty()
{
std::unique_lock<std::mutex> lock(m_);
return queue_.empty();
}
private:
std::mutex m_;
std::condition_variable c_empty_,c_full_;
std::deque<T> queue_;
unsigned capacity_;
};
int main()
{
BlockingQueue<int> produced(100000);
const int nitems = 100000000;
std::srand(12345);
std::function<void()> f_prod = [&]() {
int i = nitems;
while(i-- > 0){
produced.put(i);
}
};
std::thread producer1(f_prod);
std::function<void()> f_cons = [&]() {
const int size = 10000;
int arr[size];
int i = nitems;
while(i-- > 0)
{
arr[std::rand()%size] = produced.take();
}
};
std::thread consumer1(f_cons);
producer1.join();
consumer1.join();
}
On my system this sped the C++ solution up considerably (to about 13.7 seconds). I didn't time the Java solution.