ConConcurrentQueue Testing

来源:互联网 发布:淘宝永立旅行箱怎么样 编辑:程序博客网 时间:2024/06/05 21:11
// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.// An overview, including benchmark results, is provided here://     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++// The full design is also described in excruciating detail at://    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue// Simplified BSD license:// Copyright (c) 2013-2016, Cameron Desrochers.// All rights reserved.//// Redistribution and use in source and binary forms, with or without modification,// are permitted provided that the following conditions are met://// - Redistributions of source code must retain the above copyright notice, this list of// conditions and the following disclaimer.// - Redistributions in binary form must reproduce the above copyright notice, this list of// conditions and the following disclaimer in the documentation and/or other materials// provided with the distribution.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.#pragma once#if defined(__GNUC__)// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings// upon assigning any computed values)#pragma GCC diagnostic push#pragma GCC diagnostic ignored "-Wconversion"#ifdef MCDBGQ_USE_RELACY#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"#endif#endif#if defined(__APPLE__)#include "TargetConditionals.h"#endif#ifdef MCDBGQ_USE_RELACY#include "relacy/relacy_std.hpp"#include "relacy_shims.h"// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.// We'll override the default trait malloc ourselves without a macro.#undef new#undef delete#undef malloc#undef free#else#include <atomic>// Requires C++11. Sorry VS2010.#include <cassert>#endif#include <cstddef>              // for max_align_t#include <cstdint>#include <cstdlib>#include <type_traits>#include <algorithm>#include <utility>#include <limits>#include <climits>// for CHAR_BIT#include <array>#include <thread>// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading// Platform-specific definitions of a numeric thread ID type and an invalid valuenamespace moodycamel { namespace details {template<typename thread_id_t> struct thread_id_converter {typedef thread_id_t thread_id_numeric_size_t;typedef thread_id_t thread_id_hash_t;static thread_id_hash_t prehash(thread_id_t const& x) { return x; }};} }#if defined(MCDBGQ_USE_RELACY)namespace moodycamel { namespace details {typedef std::uint32_t thread_id_t;static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;static inline thread_id_t thread_id() { return rl::thread_index(); }} }#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)// No sense pulling in windows.h in a header, we'll manually declare the function// we use and rely on backwards-compatibility for this not to breakextern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);namespace moodycamel { namespace details {static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");typedef std::uint32_t thread_id_t;static const thread_id_t invalid_thread_id  = 0;// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspxstatic const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }} }#elif defined(__APPLE__) && TARGET_OS_IPHONEnamespace moodycamel { namespace details {static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");typedef std::thread::id thread_id_t;static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't// be.static inline thread_id_t thread_id() { return std::this_thread::get_id(); }template<std::size_t> struct thread_id_size { };template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };template<> struct thread_id_converter<thread_id_t> {typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;#ifndef __APPLE__typedef std::size_t thread_id_hash_t;#elsetypedef thread_id_numeric_size_t thread_id_hash_t;#endifstatic thread_id_hash_t prehash(thread_id_t const& x){#ifndef __APPLE__return std::hash<std::thread::id>()(x);#elsereturn *reinterpret_cast<thread_id_hash_t const*>(&x);#endif}};} }#else// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475// In order to get a numeric thread ID in a platform-independent way, we use a thread-local// static variable's address as a thread identifier :-)#if defined(__GNUC__) || defined(__INTEL_COMPILER)#define MOODYCAMEL_THREADLOCAL __thread#elif defined(_MSC_VER)#define MOODYCAMEL_THREADLOCAL __declspec(thread)#else// Assume C++11 compliant compiler#define MOODYCAMEL_THREADLOCAL thread_local#endifnamespace moodycamel { namespace details {typedef std::uintptr_t thread_id_t;static const thread_id_t invalid_thread_id  = 0;// Address can't be nullptrstatic const thread_id_t invalid_thread_id2 = 1;// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }} }#endif// Exceptions#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))#define MOODYCAMEL_EXCEPTIONS_ENABLED#define MOODYCAMEL_TRY try#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)#define MOODYCAMEL_RETHROW throw#define MOODYCAMEL_THROW(expr) throw (expr)#else#define MOODYCAMEL_TRY if (true)#define MOODYCAMEL_CATCH(...) else if (false)#define MOODYCAMEL_RETHROW#define MOODYCAMEL_THROW(expr)#endif#endif#ifndef MOODYCAMEL_NOEXCEPT#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)#define MOODYCAMEL_NOEXCEPT#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(// We have to assume *all* non-trivial constructors may throw on VS2012!#define MOODYCAMEL_NOEXCEPT _NOEXCEPT#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900#define MOODYCAMEL_NOEXCEPT _NOEXCEPT#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))#else#define MOODYCAMEL_NOEXCEPT noexcept#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)#endif#endif#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED#ifdef MCDBGQ_USE_RELACY#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED#else//// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445//// g++ <=4.7 doesn't support thread_local either.//// Finally, iOS/ARM doesn't have support for it either.//#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE)//// Assume `thread_local` is fully supported in all other C++11 compilers/runtimes//#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED//#endif#endif#endif// VS2012 doesn't support deleted functions. // In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.#ifndef MOODYCAMEL_DELETE_FUNCTION#if defined(_MSC_VER) && _MSC_VER < 1800#define MOODYCAMEL_DELETE_FUNCTION#else#define MOODYCAMEL_DELETE_FUNCTION = delete#endif#endif// Compiler-specific likely/unlikely hintsnamespace moodycamel { namespace details {#if defined(__GNUC__)inline bool likely(bool x) { return __builtin_expect((x), true); }inline bool unlikely(bool x) { return __builtin_expect((x), false); }#elseinline bool likely(bool x) { return x; }inline bool unlikely(bool x) { return x; }#endif} }#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG#include "internal/concurrentqueue_internal_debug.h"#endifnamespace moodycamel {namespace details {template<typename T>struct const_numeric_max {static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");static const T value = std::numeric_limits<T>::is_signed? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1): static_cast<T>(-1);};#ifdef __GNUC__typedef ::max_align_t max_align_t;      // GCC forgot to add it to std:: for a while#elsetypedef std::max_align_t max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::#endif}// Default traits for the ConcurrentQueue. To change some of the// traits without re-implementing all of them, inherit from this// struct and shadow the declarations you wish to be different;// since the traits are used as a template type parameter, the// shadowed declarations will be used where defined, and the defaults// otherwise.struct ConcurrentQueueDefaultTraits{// General-purpose size type. std::size_t is strongly recommended.typedef std::size_t size_t;// The type used for the enqueue and dequeue indices. Must be at least as// large as size_t. Should be significantly larger than the number of elements// you expect to hold at once, especially if you have a high turnover rate;// for example, on 32-bit x86, if you expect to have over a hundred million// elements or pump several million elements through your queue in a very// short space of time, using a 32-bit type *may* trigger a race condition.// A 64-bit int type is recommended in that case, and in practice will// prevent a race condition no matter the usage of the queue. Note that// whether the queue is lock-free with a 64-int type depends on the whether// std::atomic<std::uint64_t> is lock-free, which is platform-specific.typedef std::size_t index_t;// Internally, all elements are enqueued and dequeued from multi-element// blocks; this is the smallest controllable unit. If you expect few elements// but many producers, a smaller block size should be favoured. For few producers// and/or many elements, a larger block size is preferred. A sane default// is provided. Must be a power of 2.static const size_t BLOCK_SIZE = 32;// For explicit producers (i.e. when using a producer token), the block is// checked for being empty by iterating through a list of flags, one per element.// For large block sizes, this is too inefficient, and switching to an atomic// counter-based approach is faster. The switch is made for block sizes strictly// larger than this threshold.static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;// How many full blocks can be expected for a single explicit producer? This should// reflect that number's maximum for optimal performance. Must be a power of 2.static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;// How many full blocks can be expected for a single implicit producer? This should// reflect that number's maximum for optimal performance. Must be a power of 2.static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;// The initial size of the hash table mapping thread IDs to implicit producers.// Note that the hash is resized every time it becomes half full.// Must be a power of two, and either 0 or at least 1. If 0, implicit production// (using the enqueue methods without an explicit producer token) is disabled.static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;// Controls the number of items that an explicit consumer (i.e. one with a token)// must consume before it causes all consumers to rotate and move on to the next// internal queue.static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.// Enqueue operations that would cause this limit to be surpassed will fail. Note// that this limit is enforced at the block level (for performance reasons), i.e.// it's rounded up to the nearest block size.static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;#ifndef MCDBGQ_USE_RELACY// Memory allocation can be customized if needed.// malloc should return nullptr on failure, and handle alignment like std::malloc.#if defined(malloc) || defined(free)// Gah, this is 2015, stop defining macros that break standard code already!// Work around malloc/free being special macros:static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }static inline void WORKAROUND_free(void* ptr) { return free(ptr); }static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }#elsestatic inline void* malloc(size_t size) { return std::malloc(size); }static inline void free(void* ptr) { return std::free(ptr); }#endif#else// Debug versions when running under the Relacy race detector (ignore// these in user code)static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }static inline void free(void* ptr) { return rl::rl_free(ptr, $); }#endif};// When producing or consuming many elements, the most efficient way is to://    1) Use one of the bulk-operation methods of the queue with a token//    2) Failing that, use the bulk-operation methods without a token//    3) Failing that, create a token and use that with the single-item methods//    4) Failing that, use the single-parameter methods of the queue// Having said that, don't create tokens willy-nilly -- ideally there should be// a maximum of one token per thread (of each kind).struct ProducerToken;struct ConsumerToken;template<typename T, typename Traits> class ConcurrentQueue;template<typename T, typename Traits> class BlockingConcurrentQueue;class ConcurrentQueueTests;namespace details{struct ConcurrentQueueProducerTypelessBase{ConcurrentQueueProducerTypelessBase* next;std::atomic<bool> inactive;ProducerToken* token;ConcurrentQueueProducerTypelessBase(): next(nullptr), inactive(false), token(nullptr){}};template<bool use32> struct _hash_32_or_64 {static inline std::uint32_t hash(std::uint32_t h){// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp// Since the thread ID is already unique, all we really want to do is propagate that// uniqueness evenly across all the bits, so that we can use a subset of the bits while// reducing collisions significantlyh ^= h >> 16;h *= 0x85ebca6b;h ^= h >> 13;h *= 0xc2b2ae35;return h ^ (h >> 16);}};template<> struct _hash_32_or_64<1> {static inline std::uint64_t hash(std::uint64_t h){h ^= h >> 33;h *= 0xff51afd7ed558ccd;h ^= h >> 33;h *= 0xc4ceb9fe1a85ec53;return h ^ (h >> 33);}};template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };static inline size_t hash_thread_id(thread_id_t id){static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(thread_id_converter<thread_id_t>::prehash(id)));}template<typename T>static inline bool circular_less_than(T a, T b){#ifdef _MSC_VER#pragma warning(push)#pragma warning(disable: 4554)#endifstatic_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << static_cast<T>(sizeof(T) * CHAR_BIT - 1));#ifdef _MSC_VER#pragma warning(pop)#endif}template<typename U>static inline char* align_for(char* ptr){const std::size_t alignment = std::alignment_of<U>::value;return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;}template<typename T>static inline T ceil_to_pow_2(T x){static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2--x;x |= x >> 1;x |= x >> 2;x |= x >> 4;for (std::size_t i = 1; i < sizeof(T); i <<= 1) {x |= x >> (i << 3);}++x;return x;}template<typename T>static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right){T temp = std::move(left.load(std::memory_order_relaxed));left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);right.store(std::move(temp), std::memory_order_relaxed);}template<typename T>static inline T const& nomove(T const& x){return x;}template<bool Enable>struct nomove_if{template<typename T>static inline T const& eval(T const& x){return x;}};template<>struct nomove_if<false>{template<typename U>static inline auto eval(U&& x)-> decltype(std::forward<U>(x)){return std::forward<U>(x);}};template<typename It>static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it){return *it;}#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };#elsetemplate<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };#endif#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED#ifdef MCDBGQ_USE_RELACYtypedef RelacyThreadExitListener ThreadExitListener;typedef RelacyThreadExitNotifier ThreadExitNotifier;#elsestruct ThreadExitListener{typedef void (*callback_t)(void*);callback_t callback;void* userData;ThreadExitListener* next;// reserved for use by the ThreadExitNotifier};class ThreadExitNotifier{public:static void subscribe(ThreadExitListener* listener){auto& tlsInst = instance();listener->next = tlsInst.tail;tlsInst.tail = listener;}static void unsubscribe(ThreadExitListener* listener){auto& tlsInst = instance();ThreadExitListener** prev = &tlsInst.tail;for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {if (ptr == listener) {*prev = ptr->next;break;}prev = &ptr->next;}}private:ThreadExitNotifier() : tail(nullptr) { }ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;~ThreadExitNotifier(){// This thread is about to exit, let everyone know!assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {ptr->callback(ptr->userData);}}// Thread-localstatic inline ThreadExitNotifier& instance(){static thread_local ThreadExitNotifier notifier;return notifier;}private:ThreadExitListener* tail;};#endif#endiftemplate<typename T> struct static_is_lock_free_num { enum { value = 0 }; };template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };}struct ProducerToken{template<typename T, typename Traits>explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);template<typename T, typename Traits>explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT: producer(other.producer){other.producer = nullptr;if (producer != nullptr) {producer->token = this;}}inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT{swap(other);return *this;}void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT{std::swap(producer, other.producer);if (producer != nullptr) {producer->token = this;}if (other.producer != nullptr) {other.producer->token = &other;}}// A token is always valid unless://     1) Memory allocation failed during construction//     2) It was moved via the move constructor//        (Note: assignment does a swap, leaving both potentially valid)//     3) The associated queue was destroyed// Note that if valid() returns true, that only indicates// that the token is valid for use with a specific queue,// but not which one; that's up to the user to track.inline bool valid() const { return producer != nullptr; }~ProducerToken(){if (producer != nullptr) {producer->token = nullptr;producer->inactive.store(true, std::memory_order_release);}}// Disable copying and assignmentProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;private:template<typename T, typename Traits> friend class ConcurrentQueue;friend class ConcurrentQueueTests;protected:details::ConcurrentQueueProducerTypelessBase* producer;};struct ConsumerToken{template<typename T, typename Traits>explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);template<typename T, typename Traits>explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer){}inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT{swap(other);return *this;}void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT{std::swap(initialOffset, other.initialOffset);std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);std::swap(currentProducer, other.currentProducer);std::swap(desiredProducer, other.desiredProducer);}// Disable copying and assignmentConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;private:template<typename T, typename Traits> friend class ConcurrentQueue;friend class ConcurrentQueueTests;private: // but shared with ConcurrentQueuestd::uint32_t initialOffset;std::uint32_t lastKnownGlobalOffset;std::uint32_t itemsConsumedFromCurrent;details::ConcurrentQueueProducerTypelessBase* currentProducer;details::ConcurrentQueueProducerTypelessBase* desiredProducer;};// Need to forward-declare this swap because it's in a namespace.// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespacestemplate<typename T, typename Traits>inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;template<typename T, typename Traits = ConcurrentQueueDefaultTraits>class ConcurrentQueue{public:typedef ::moodycamel::ProducerToken producer_token_t;typedef ::moodycamel::ConsumerToken consumer_token_t;typedef typename Traits::index_t index_t;typedef typename Traits::size_t size_t;static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);#ifdef _MSC_VER#pragma warning(push)#pragma warning(disable: 4307)// + integral constant overflow (that's what the ternary expression is for!)#pragma warning(disable: 4309)// static_cast: Truncation of constant value#endifstatic const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);#ifdef _MSC_VER#pragma warning(pop)#endifstatic_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");public:// Creates a queue with at least `capacity` element slots; note that the// actual number of elements that can be inserted without additional memory// allocation depends on the number of producers and the block size (e.g. if// the block size is equal to `capacity`, only a single block will be allocated// up-front, which means only a single producer will be able to enqueue elements// without an extra allocation -- blocks aren't shared between producers).// This method is not thread safe -- it is up to the user to ensure that the// queue is fully constructed before it starts being used by other threads (this// includes making the memory effects of construction visible, possibly with a// memory barrier).explicit ConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE): producerListTail(nullptr),producerCount(0),initialBlockPoolIndex(0),nextExplicitConsumerId(0),globalExplicitConsumerOffset(0){implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);populate_initial_implicit_producer_hash();populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG// Track all the producers using a fully-resolved typed list for// each kind; this makes it possible to debug them starting from// the root queue object (otherwise wacky casts are needed that// don't compile in the debugger's expression evaluator).explicitProducers.store(nullptr, std::memory_order_relaxed);implicitProducers.store(nullptr, std::memory_order_relaxed);#endif}// Computes the correct amount of pre-allocated blocks for you based// on the minimum number of elements you want available at any given// time, and the maximum concurrent number of each type of producer.ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers): producerListTail(nullptr),producerCount(0),initialBlockPoolIndex(0),nextExplicitConsumerId(0),globalExplicitConsumerOffset(0){implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);populate_initial_implicit_producer_hash();size_t blocks = ((((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers)) * BLOCK_SIZE;populate_initial_block_list(blocks);#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGexplicitProducers.store(nullptr, std::memory_order_relaxed);implicitProducers.store(nullptr, std::memory_order_relaxed);#endif}// Note: The queue should not be accessed concurrently while it's// being deleted. It's up to the user to synchronize this.// This method is not thread safe.~ConcurrentQueue(){// Destroy producersauto ptr = producerListTail.load(std::memory_order_relaxed);while (ptr != nullptr) {auto next = ptr->next_prod();if (ptr->token != nullptr) {ptr->token->producer = nullptr;}destroy(ptr);ptr = next;}// Destroy implicit producer hash tablesif (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {auto hash = implicitProducerHash.load(std::memory_order_relaxed);while (hash != nullptr) {auto prev = hash->prev;if (prev != nullptr) {// The last hash is part of this object and was not allocated dynamicallyfor (size_t i = 0; i != hash->capacity; ++i) {hash->entries[i].~ImplicitProducerKVP();}hash->~ImplicitProducerHash();(Traits::free)(hash);}hash = prev;}}// Destroy global free listauto block = freeList.head_unsafe();while (block != nullptr) {auto next = block->freeListNext.load(std::memory_order_relaxed);if (block->dynamicallyAllocated) {destroy(block);}block = next;}// Destroy initial free listdestroy_array(initialBlockPool, initialBlockPoolSize);}// Disable copying and copy assignmentConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;// Moving is supported, but note that it is *not* a thread-safe operation.// Nobody can use the queue while it's being moved, and the memory effects// of that move must be propagated to other threads before they can use it.// Note: When a queue is moved, its tokens are still valid but can only be// used with the destination queue (i.e. semantically they are moved along// with the queue itself).ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),producerCount(other.producerCount.load(std::memory_order_relaxed)),initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),initialBlockPool(other.initialBlockPool),initialBlockPoolSize(other.initialBlockPoolSize),freeList(std::move(other.freeList)),nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)){// Move the other one into this, and leave the other one as an empty queueimplicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);populate_initial_implicit_producer_hash();swap_implicit_producer_hashes(other);other.producerListTail.store(nullptr, std::memory_order_relaxed);other.producerCount.store(0, std::memory_order_relaxed);other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGexplicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);other.explicitProducers.store(nullptr, std::memory_order_relaxed);implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);other.implicitProducers.store(nullptr, std::memory_order_relaxed);#endifother.initialBlockPoolIndex.store(0, std::memory_order_relaxed);other.initialBlockPoolSize = 0;other.initialBlockPool = nullptr;reown_producers();}inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT{return swap_internal(other);}// Swaps this queue's state with the other's. Not thread-safe.// Swapping two queues does not invalidate their tokens, however// the tokens that were created for one queue must be used with// only the swapped queue (i.e. the tokens are tied to the// queue's movable state, not the object itself).inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT{swap_internal(other);}private:ConcurrentQueue& swap_internal(ConcurrentQueue& other){if (this == &other) {return *this;}details::swap_relaxed(producerListTail, other.producerListTail);details::swap_relaxed(producerCount, other.producerCount);details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);std::swap(initialBlockPool, other.initialBlockPool);std::swap(initialBlockPoolSize, other.initialBlockPoolSize);freeList.swap(other.freeList);details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);swap_implicit_producer_hashes(other);reown_producers();other.reown_producers();#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGdetails::swap_relaxed(explicitProducers, other.explicitProducers);details::swap_relaxed(implicitProducers, other.implicitProducers);#endifreturn *this;}public:// Enqueues a single item (by copying it).// Allocates memory if required. Only fails if memory allocation fails (or implicit// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Thread-safe.inline bool enqueue(T const& item){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue<CanAlloc>(item);}// Enqueues a single item (by moving it, if possible).// Allocates memory if required. Only fails if memory allocation fails (or implicit// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Thread-safe.inline bool enqueue(T&& item){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue<CanAlloc>(std::move(item));}// Enqueues a single item (by copying it) using an explicit producer token.// Allocates memory if required. Only fails if memory allocation fails (or// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Thread-safe.inline bool enqueue(producer_token_t const& token, T const& item){return inner_enqueue<CanAlloc>(token, item);}// Enqueues a single item (by moving it, if possible) using an explicit producer token.// Allocates memory if required. Only fails if memory allocation fails (or// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Thread-safe.inline bool enqueue(producer_token_t const& token, T&& item){return inner_enqueue<CanAlloc>(token, std::move(item));}// Enqueues several items.// Allocates memory if required. Only fails if memory allocation fails (or// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Note: Use std::make_move_iterator if the elements should be moved instead of copied.// Thread-safe.template<typename It>bool enqueue_bulk(It itemFirst, size_t count){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue_bulk<CanAlloc>(itemFirst, count);}// Enqueues several items using an explicit producer token.// Allocates memory if required. Only fails if memory allocation fails// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).// Note: Use std::make_move_iterator if the elements should be moved// instead of copied.// Thread-safe.template<typename It>bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count){return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);}// Enqueues a single item (by copying it).// Does not allocate memory. Fails if not enough room to enqueue (or implicit// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE// is 0).// Thread-safe.inline bool try_enqueue(T const& item){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue<CannotAlloc>(item);}// Enqueues a single item (by moving it, if possible).// Does not allocate memory (except for one-time implicit producer).// Fails if not enough room to enqueue (or implicit production is// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).// Thread-safe.inline bool try_enqueue(T&& item){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue<CannotAlloc>(std::move(item));}// Enqueues a single item (by copying it) using an explicit producer token.// Does not allocate memory. Fails if not enough room to enqueue.// Thread-safe.inline bool try_enqueue(producer_token_t const& token, T const& item){return inner_enqueue<CannotAlloc>(token, item);}// Enqueues a single item (by moving it, if possible) using an explicit producer token.// Does not allocate memory. Fails if not enough room to enqueue.// Thread-safe.inline bool try_enqueue(producer_token_t const& token, T&& item){return inner_enqueue<CannotAlloc>(token, std::move(item));}// Enqueues several items.// Does not allocate memory (except for one-time implicit producer).// Fails if not enough room to enqueue (or implicit production is// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).// Note: Use std::make_move_iterator if the elements should be moved// instead of copied.// Thread-safe.template<typename It>bool try_enqueue_bulk(It itemFirst, size_t count){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);}// Enqueues several items using an explicit producer token.// Does not allocate memory. Fails if not enough room to enqueue.// Note: Use std::make_move_iterator if the elements should be moved// instead of copied.// Thread-safe.template<typename It>bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count){return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);}// Attempts to dequeue from the queue.// Returns false if all producer streams appeared empty at the time they// were checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename U>bool try_dequeue(U& item){// Instead of simply trying each producer in turn (which could cause needless contention on the first// producer), we score them heuristically.size_t nonEmptyCount = 0;ProducerBase* best = nullptr;size_t bestSize = 0;for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {auto size = ptr->size_approx();if (size > 0) {if (size > bestSize) {bestSize = size;best = ptr;}++nonEmptyCount;}}// If there was at least one non-empty queue but it appears empty at the time// we try to dequeue from it, we need to make sure every queue's been triedif (nonEmptyCount > 0) {if (details::likely(best->dequeue(item))) {return true;}for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {if (ptr != best && ptr->dequeue(item)) {return true;}}}return false;}// Attempts to dequeue from the queue.// Returns false if all producer streams appeared empty at the time they// were checked (so, the queue is likely but not guaranteed to be empty).// This differs from the try_dequeue(item) method in that this one does// not attempt to reduce contention by interleaving the order that producer// streams are dequeued from. So, using this method can reduce overall throughput// under contention, but will give more predictable results in single-threaded// consumer scenarios. This is mostly only useful for internal unit tests.// Never allocates. Thread-safe.template<typename U>bool try_dequeue_non_interleaved(U& item){for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {if (ptr->dequeue(item)) {return true;}}return false;}// Attempts to dequeue from the queue using an explicit consumer token.// Returns false if all producer streams appeared empty at the time they// were checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename U>bool try_dequeue(consumer_token_t& token, U& item){// The idea is roughly as follows:// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place// If there's no items where you're supposed to be, keep moving until you find a producer with some items// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in itif (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {if (!update_current_producer_after_rotation(token)) {return false;}}// If there was at least one non-empty queue but it appears empty at the time// we try to dequeue from it, we need to make sure every queue's been triedif (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);}return true;}auto tail = producerListTail.load(std::memory_order_acquire);auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();if (ptr == nullptr) {ptr = tail;}while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {if (ptr->dequeue(item)) {token.currentProducer = ptr;token.itemsConsumedFromCurrent = 1;return true;}ptr = ptr->next_prod();if (ptr == nullptr) {ptr = tail;}}return false;}// Attempts to dequeue several elements from the queue.// Returns the number of items actually dequeued.// Returns 0 if all producer streams appeared empty at the time they// were checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename It>size_t try_dequeue_bulk(It itemFirst, size_t max){size_t count = 0;for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {count += ptr->dequeue_bulk(itemFirst, max - count);if (count == max) {break;}}return count;}// Attempts to dequeue several elements from the queue using an explicit consumer token.// Returns the number of items actually dequeued.// Returns 0 if all producer streams appeared empty at the time they// were checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename It>size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max){if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {if (!update_current_producer_after_rotation(token)) {return 0;}}size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);if (count == max) {if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);}return max;}token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);max -= count;auto tail = producerListTail.load(std::memory_order_acquire);auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();if (ptr == nullptr) {ptr = tail;}while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {auto dequeued = ptr->dequeue_bulk(itemFirst, max);count += dequeued;if (dequeued != 0) {token.currentProducer = ptr;token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);}if (dequeued == max) {break;}max -= dequeued;ptr = ptr->next_prod();if (ptr == nullptr) {ptr = tail;}}return count;}// Attempts to dequeue from a specific producer's inner queue.// If you happen to know which producer you want to dequeue from, this// is significantly faster than using the general-case try_dequeue methods.// Returns false if the producer's queue appeared empty at the time it// was checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename U>inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item){return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);}// Attempts to dequeue several elements from a specific producer's inner queue.// Returns the number of items actually dequeued.// If you happen to know which producer you want to dequeue from, this// is significantly faster than using the general-case try_dequeue methods.// Returns 0 if the producer's queue appeared empty at the time it// was checked (so, the queue is likely but not guaranteed to be empty).// Never allocates. Thread-safe.template<typename It>inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max){return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);}// Returns an estimate of the total number of elements currently in the queue. This// estimate is only accurate if the queue has completely stabilized before it is called// (i.e. all enqueue and dequeue operations have completed and their memory effects are// visible on the calling thread, and no further operations start while this method is// being called).// Thread-safe.size_t size_approx() const{size_t size = 0;for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {size += ptr->size_approx();}return size;}// Returns true if the underlying atomic variables used by// the queue are lock-free (they should be on most platforms).// Thread-safe.static bool is_lock_free(){returndetails::static_is_lock_free<bool>::value == 2 &&details::static_is_lock_free<size_t>::value == 2 &&details::static_is_lock_free<std::uint32_t>::value == 2 &&details::static_is_lock_free<index_t>::value == 2 &&details::static_is_lock_free<void*>::value == 2 &&details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;}private:friend struct ProducerToken;friend struct ConsumerToken;friend struct ExplicitProducer;friend class ConcurrentQueueTests;enum AllocationMode { CanAlloc, CannotAlloc };///////////////////////////////// Queue methods///////////////////////////////template<AllocationMode canAlloc, typename U>inline bool inner_enqueue(producer_token_t const& token, U&& element){return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));}template<AllocationMode canAlloc, typename U>inline bool inner_enqueue(U&& element){auto producer = get_or_add_implicit_producer();return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));}template<AllocationMode canAlloc, typename It>inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count){return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);}template<AllocationMode canAlloc, typename It>inline bool inner_enqueue_bulk(It itemFirst, size_t count){auto producer = get_or_add_implicit_producer();return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);}inline bool update_current_producer_after_rotation(consumer_token_t& token){// Ah, there's been a rotation, figure out where we should be!auto tail = producerListTail.load(std::memory_order_acquire);if (token.desiredProducer == nullptr && tail == nullptr) {return false;}auto prodCount = producerCount.load(std::memory_order_relaxed);auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);if (details::unlikely(token.desiredProducer == nullptr)) {// Aha, first time we're dequeueing anything.// Figure out our local position// Note: offset is from start, not end, but we're traversing from end -- subtract from count firststd::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);token.desiredProducer = tail;for (std::uint32_t i = 0; i != offset; ++i) {token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();if (token.desiredProducer == nullptr) {token.desiredProducer = tail;}}}std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;if (delta >= prodCount) {delta = delta % prodCount;}for (std::uint32_t i = 0; i != delta; ++i) {token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();if (token.desiredProducer == nullptr) {token.desiredProducer = tail;}}token.lastKnownGlobalOffset = globalOffset;token.currentProducer = token.desiredProducer;token.itemsConsumedFromCurrent = 0;return true;}///////////////////////////// Free list///////////////////////////template <typename N>struct FreeListNode{FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }std::atomic<std::uint32_t> freeListRefs;std::atomic<N*> freeListNext;};// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly// speedy under low contention.template<typename N>// N must inherit FreeListNode or have the same fields (and initialization of them)struct FreeList{FreeList() : freeListHead(nullptr) { }FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;inline void add(N* node){#if MCDBGQ_NOLOCKFREE_FREELISTdebug::DebugLock lock(mutex);#endif// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to// set it using a fetch_addif (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {// Oh look! We were the last ones referencing this node, and we know// we want to add it to the free list, so let's do it! add_knowing_refcount_is_zero(node);}}inline N* try_get(){#if MCDBGQ_NOLOCKFREE_FREELISTdebug::DebugLock lock(mutex);#endifauto head = freeListHead.load(std::memory_order_acquire);while (head != nullptr) {auto prevHead = head;auto refs = head->freeListRefs.load(std::memory_order_relaxed);if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {head = freeListHead.load(std::memory_order_acquire);continue;}// Good, reference count has been incremented (it wasn't at zero), which means we can read the// next and not worry about it changing between now and the time we do the CASauto next = head->freeListNext.load(std::memory_order_relaxed);if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);// Decrease refcount twice, once for our ref, and once for the list's refhead->freeListRefs.fetch_add(-2, std::memory_order_release);return head;}// OK, the head must have changed on us, but we still need to decrease the refcount we increased.// Note that we don't need to release any memory effects, but we do need to ensure that the reference// count decrement happens-after the CAS on the head.refs = prevHead->freeListRefs.fetch_add(-1, std::memory_order_acq_rel);if (refs == SHOULD_BE_ON_FREELIST + 1) {add_knowing_refcount_is_zero(prevHead);}}return nullptr;}// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }private:inline void add_knowing_refcount_is_zero(N* node){// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run// only one copy of this method per node at a time, i.e. the single thread case), then we know// we can safely change the next pointer of the node; however, once the refcount is back above// zero, then other threads could increase it (happens under heavy contention, when the refcount// goes to zero in between a load and a refcount increment of a node in try_get, then back up to// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS// to add the node to the actual list fails, decrease the refcount and leave the add operation to// the next thread who puts the refcount back at zero (which could be us, hence the loop).auto head = freeListHead.load(std::memory_order_relaxed);while (true) {node->freeListNext.store(head, std::memory_order_relaxed);node->freeListRefs.store(1, std::memory_order_release);if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {// Hmm, the add failed, but we can only try again when the refcount goes back to zeroif (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {continue;}}return;}}private:// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)std::atomic<N*> freeListHead;static const std::uint32_t REFS_MASK = 0x7FFFFFFF;static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;#if MCDBGQ_NOLOCKFREE_FREELISTdebug::DebugMutex mutex;#endif};///////////////////////////// Block///////////////////////////enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };struct Block{Block(): next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), shouldBeOnFreeList(false), dynamicallyAllocated(true){#if MCDBGQ_TRACKMEMowner = nullptr;#endif}template<InnerQueueContext context>inline bool is_empty() const{if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {// Check flagsfor (size_t i = 0; i < BLOCK_SIZE; ++i) {if (!emptyFlags[i].load(std::memory_order_relaxed)) {return false;}}// Aha, empty; make sure we have all other memory effects that happened before the empty flags were setstd::atomic_thread_fence(std::memory_order_acquire);return true;}else {// Check counterif (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {std::atomic_thread_fence(std::memory_order_acquire);return true;}assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);return false;}}// Returns true if the block is now empty (does not apply in explicit context)template<InnerQueueContext context>inline bool set_empty(index_t i){if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {// Set flagassert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);return false;}else {// Increment counterauto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);assert(prevVal < BLOCK_SIZE);return prevVal == BLOCK_SIZE - 1;}}// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).// Returns true if the block is now empty (does not apply in explicit context).template<InnerQueueContext context>inline bool set_many_empty(index_t i, size_t count){if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {// Set flagsstd::atomic_thread_fence(std::memory_order_release);i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;for (size_t j = 0; j != count; ++j) {assert(!emptyFlags[i + j].load(std::memory_order_relaxed));emptyFlags[i + j].store(true, std::memory_order_relaxed);}return false;}else {// Increment counterauto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);assert(prevVal + count <= BLOCK_SIZE);return prevVal + count == BLOCK_SIZE;}}template<InnerQueueContext context>inline void set_all_empty(){if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {// Set all flagsfor (size_t i = 0; i != BLOCK_SIZE; ++i) {emptyFlags[i].store(true, std::memory_order_relaxed);}}else {// Reset counterelementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);}}template<InnerQueueContext context>inline void reset_empty(){if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {// Reset flagsfor (size_t i = 0; i != BLOCK_SIZE; ++i) {emptyFlags[i].store(false, std::memory_order_relaxed);}}else {// Reset counterelementsCompletelyDequeued.store(0, std::memory_order_relaxed);}}inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }private:// IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of// addresses returned by malloc, that alignment will be preserved. Apparently clang actually// generates code that uses this assumption for AVX instructions in some cases. Ideally, we// should also align Block to the alignment of T in case it's higher than malloc's 16-byte// alignment, but this is hard to do in a cross-platform way. Assert for this case:static_assert(std::alignment_of<T>::value <= std::alignment_of<details::max_align_t>::value, "The queue does not support super-aligned types at this time");// Additionally, we need the alignment of Block itself to be a multiple of max_align_t since// otherwise the appropriate padding will not be added at the end of Block in order to make// arrays of Blocks all be properly aligned (not just the first one). We use a union to force// this.union {char elements[sizeof(T) * BLOCK_SIZE];details::max_align_t dummy;};public:Block* next;std::atomic<size_t> elementsCompletelyDequeued;std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];public:std::atomic<std::uint32_t> freeListRefs;std::atomic<Block*> freeListNext;std::atomic<bool> shouldBeOnFreeList;bool dynamicallyAllocated;// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'#if MCDBGQ_TRACKMEMvoid* owner;#endif};static_assert(std::alignment_of<Block>::value >= std::alignment_of<details::max_align_t>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");#if MCDBGQ_TRACKMEMpublic:struct MemStats;private:#endif///////////////////////////// Producer base///////////////////////////struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase{ProducerBase(ConcurrentQueue* parent, bool isExplicit) :tailIndex(0),headIndex(0),dequeueOptimisticCount(0),dequeueOvercommit(0),tailBlock(nullptr),isExplicit(isExplicit),parent(parent){}virtual ~ProducerBase() { };template<typename U>inline bool dequeue(U& element){if (isExplicit) {return static_cast<ExplicitProducer*>(this)->dequeue(element);}else {return static_cast<ImplicitProducer*>(this)->dequeue(element);}}template<typename It>inline size_t dequeue_bulk(It& itemFirst, size_t max){if (isExplicit) {return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);}else {return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);}}inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }inline size_t size_approx() const{auto tail = tailIndex.load(std::memory_order_relaxed);auto head = headIndex.load(std::memory_order_relaxed);return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;}inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }protected:std::atomic<index_t> tailIndex;// Where to enqueue to nextstd::atomic<index_t> headIndex;// Where to dequeue from nextstd::atomic<index_t> dequeueOptimisticCount;std::atomic<index_t> dequeueOvercommit;Block* tailBlock;public:bool isExplicit;ConcurrentQueue* parent;protected:#if MCDBGQ_TRACKMEMfriend struct MemStats;#endif};///////////////////////////// Explicit queue///////////////////////////struct ExplicitProducer : public ProducerBase{explicit ExplicitProducer(ConcurrentQueue* parent) :ProducerBase(parent, true),blockIndex(nullptr),pr_blockIndexSlotsUsed(0),pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),pr_blockIndexFront(0),pr_blockIndexEntries(nullptr),pr_blockIndexRaw(nullptr){size_t poolBasedIndexSize = details::ceil_to_pow_2(parent->initialBlockPoolSize) >> 1;if (poolBasedIndexSize > pr_blockIndexSize) {pr_blockIndexSize = poolBasedIndexSize;}new_block_index(0);// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE}~ExplicitProducer(){// Destruct any elements not yet dequeued.// Since we're in the destructor, we can assume all elements// are either completely dequeued or completely not (no halfways).if (this->tailBlock != nullptr) {// Note this means there must be a block index too// First find the block that's partially dequeued, if anyBlock* halfDequeuedBlock = nullptr;if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {// The head's not on a block boundary, meaning a block somewhere is partially dequeued// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {i = (i + 1) & (pr_blockIndexSize - 1);}assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));halfDequeuedBlock = pr_blockIndexEntries[i].block;}// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)auto block = this->tailBlock;do {block = block->next;if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {continue;}size_t i = 0;// Offset into blockif (block == halfDequeuedBlock) {i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));}// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail indexauto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {(*block)[i++]->~T();}} while (block != this->tailBlock);}// Destroy all blocks that we ownif (this->tailBlock != nullptr) {auto block = this->tailBlock;do {auto nextBlock = block->next;if (block->dynamicallyAllocated) {destroy(block);}else {this->parent->add_block_to_free_list(block);}block = nextBlock;} while (block != this->tailBlock);}// Destroy the block indicesauto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);while (header != nullptr) {auto prev = static_cast<BlockIndexHeader*>(header->prev);header->~BlockIndexHeader();(Traits::free)(header);header = prev;}}template<AllocationMode allocMode, typename U>inline bool enqueue(U&& element){index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);index_t newTailIndex = 1 + currentTailIndex;if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {// We reached the end of a block, start a new oneauto startBlock = this->tailBlock;auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {// We can re-use the block ahead of us, it's empty!this->tailBlock = this->tailBlock->next;this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the// last block from it first -- except instead of removing then adding, we can just overwrite).// Note that there must be a valid block index here, since even if allocation failed in the ctor,// it would have been re-attempted when adding the first block to the queue; since there is such// a block, a block index must have been successfully allocated.}else {// Whatever head value we see here is >= the last value we saw here (relatively),// and <= its current value. Since we have the most recent tail, the head must be// <= to it.auto head = this->headIndex.load(std::memory_order_relaxed);assert(!details::circular_less_than<index_t>(currentTailIndex, head));if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {// We can't enqueue in another block because there's not enough leeway -- the// tail could surpass the head by the time the block fills up! (Or we'll exceed// the size limit, if the second part of the condition was true.)return false;}// We're going to need a new block; check that the block index has roomif (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {// Hmm, the circular block index is already full -- we'll need// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if// the initial allocation failed in the constructor.if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) {return false;}}// Insert a new block in the circular linked listauto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();if (newBlock == nullptr) {return false;}#if MCDBGQ_TRACKMEMnewBlock->owner = this;#endifnewBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();if (this->tailBlock == nullptr) {newBlock->next = newBlock;}else {newBlock->next = this->tailBlock->next;this->tailBlock->next = newBlock;}this->tailBlock = newBlock;++pr_blockIndexSlotsUsed;}if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {// The constructor may throw. We want the element not to appear in the queue in// that case (without corrupting the queue):MOODYCAMEL_TRY {new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));}MOODYCAMEL_CATCH (...) {// Revert change to the current block, but leave the new block available// for next timepr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;MOODYCAMEL_RETHROW;}}else {(void)startBlock;(void)originalBlockIndexSlotsUsed;}// Add block to block indexauto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];entry.base = currentTailIndex;entry.block = this->tailBlock;blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}}// Enqueuenew ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}template<typename U>bool dequeue(U& element){auto tail = this->tailIndex.load(std::memory_order_relaxed);auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {// Might be something to dequeue, let's give it a try// Note that this if is purely for performance purposes in the common case when the queue is// empty and the values are eventually consistent -- we may enter here spuriously.// Note that whatever the values of overcommit and tail are, they are not going to change (unless we// change them) and must be the same value at this point (inside the if) as when the if condition was// evaluated.// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all// read-modify-write operations are guaranteed to work on the latest value in the modification order), but// unfortunately that can't be shown to be correct using only the C++11 standard.// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-casestd::atomic_thread_fence(std::memory_order_acquire);// Increment optimistic counter, then check if it went over the boundaryauto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.assert(overcommit <= myDequeueCount);// Note that we reload tail here in case it changed; it will be the same value as before or greater, since// this load is sequenced after (happens after) the earlier load above. This is supported by read-read// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_ordertail = this->tailIndex.load(std::memory_order_acquire);if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {// Guaranteed to be at least one element to dequeue!// Get the index. Note that since there's guaranteed to be at least one element, this// will never exceed tail. We need to do an acquire-release fence here since it's possible// that whatever condition got us to this point was for an earlier enqueued element (that// we already see the memory effects for), but that by the time we increment somebody else// has incremented it, and we need to see the memory effects for *that* element, which is// in such a case is necessarily visible on the thread that incremented it in the first// place with the more current condition (they must have acquired a tail that is at least// as recent).auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);// Determine which block the element is inauto localBlockIndex = blockIndex.load(std::memory_order_acquire);auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);// We need to be careful here about subtracting and dividing because of index wrap-around.// When an index wraps, we need to preserve the sign of the offset when dividing it by the// block size (in order to get a correct signed block count offset in all cases):auto headBase = localBlockIndex->entries[localBlockIndexHead].base;auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / BLOCK_SIZE);auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;// Dequeueauto& el = *((*block)[index]);if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {// Make sure the element is still fully dequeued and destroyed even if the assignment// throwsstruct Guard {Block* block;index_t index;~Guard(){(*block)[index]->~T();block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);}} guard = { block, index };element = std::move(el);}else {element = std::move(el);el.~T();block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);}return true;}else {// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistentthis->dequeueOvercommit.fetch_add(1, std::memory_order_release);// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write}}return false;}template<AllocationMode allocMode, typename It>bool enqueue_bulk(It itemFirst, size_t count){// First, we need to make sure we have enough room to enqueue all of the elements;// this means pre-allocating blocks and putting them in the block index (but only if// all the allocations succeeded).index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);auto startBlock = this->tailBlock;auto originalBlockIndexFront = pr_blockIndexFront;auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;Block* firstAllocatedBlock = nullptr;// Figure out how many blocks we'll need to allocate, and do sosize_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);if (blockBaseDiff > 0) {// Allocate as many blocks as possible from aheadwhile (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);currentTailIndex += static_cast<index_t>(BLOCK_SIZE);this->tailBlock = this->tailBlock->next;firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];entry.base = currentTailIndex;entry.block = this->tailBlock;pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);}// Now allocate as many blocks as necessary from the block poolwhile (blockBaseDiff > 0) {blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);currentTailIndex += static_cast<index_t>(BLOCK_SIZE);auto head = this->headIndex.load(std::memory_order_relaxed);assert(!details::circular_less_than<index_t>(currentTailIndex, head));bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) {// Failed to allocate, undo changes (but keep injected blocks)pr_blockIndexFront = originalBlockIndexFront;pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;return false;}// pr_blockIndexFront is updated inside new_block_index, so we need to// update our fallback value too (since we keep the new index even if we// later fail)originalBlockIndexFront = originalBlockIndexSlotsUsed;}// Insert a new block in the circular linked listauto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();if (newBlock == nullptr) {pr_blockIndexFront = originalBlockIndexFront;pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;return false;}#if MCDBGQ_TRACKMEMnewBlock->owner = this;#endifnewBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();if (this->tailBlock == nullptr) {newBlock->next = newBlock;}else {newBlock->next = this->tailBlock->next;this->tailBlock->next = newBlock;}this->tailBlock = newBlock;firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;++pr_blockIndexSlotsUsed;auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];entry.base = currentTailIndex;entry.block = this->tailBlock;pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);}// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and// publish the new block index frontauto block = firstAllocatedBlock;while (true) {block->ConcurrentQueue::Block::template reset_empty<explicit_context>();if (block == this->tailBlock) {break;}block = block->next;}if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);}}// Enqueue, one block at a timeindex_t newTailIndex = startTailIndex + static_cast<index_t>(count);currentTailIndex = startTailIndex;auto endBlock = this->tailBlock;this->tailBlock = startBlock;assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {this->tailBlock = firstAllocatedBlock;}while (true) {auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {stopIndex = newTailIndex;}if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {while (currentTailIndex != stopIndex) {new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);}}else {MOODYCAMEL_TRY {while (currentTailIndex != stopIndex) {// Must use copy constructor even if move constructor is available// because we may have to revert if there's an exception.// Sorry about the horrible templated next line, but it was the only way// to disable moving *at compile time*, which is important because a type// may only define a (noexcept) move constructor, and so calls to the// cctor will not compile, even if they are in an if branch that will never// be executednew ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));++currentTailIndex;++itemFirst;}}MOODYCAMEL_CATCH (...) {// Oh dear, an exception's been thrown -- destroy the elements that// were enqueued so far and revert the entire bulk operation (we'll keep// any allocated blocks in our linked list for later, though).auto constructedStopIndex = currentTailIndex;auto lastBlockEnqueued = this->tailBlock;pr_blockIndexFront = originalBlockIndexFront;pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;if (!details::is_trivially_destructible<T>::value) {auto block = startBlock;if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {block = firstAllocatedBlock;}currentTailIndex = startTailIndex;while (true) {auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {stopIndex = constructedStopIndex;}while (currentTailIndex != stopIndex) {(*block)[currentTailIndex++]->~T();}if (block == lastBlockEnqueued) {break;}block = block->next;}}MOODYCAMEL_RETHROW;}}if (this->tailBlock == endBlock) {assert(currentTailIndex == newTailIndex);break;}this->tailBlock = this->tailBlock->next;}if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) {blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);}this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}template<typename It>size_t dequeue_bulk(It& itemFirst, size_t max){auto tail = this->tailIndex.load(std::memory_order_relaxed);auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));if (details::circular_less_than<size_t>(0, desiredCount)) {desiredCount = desiredCount < max ? desiredCount : max;std::atomic_thread_fence(std::memory_order_acquire);auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);assert(overcommit <= myDequeueCount);tail = this->tailIndex.load(std::memory_order_acquire);auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));if (details::circular_less_than<size_t>(0, actualCount)) {actualCount = desiredCount < actualCount ? desiredCount : actualCount;if (actualCount < desiredCount) {this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);}// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this// will never exceed tail.auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);// Determine which block the first element is inauto localBlockIndex = blockIndex.load(std::memory_order_acquire);auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);auto headBase = localBlockIndex->entries[localBlockIndexHead].base;auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / BLOCK_SIZE);auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);// Iterate the blocks and dequeueauto index = firstIndex;do {auto firstIndexInBlock = index;auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;auto block = localBlockIndex->entries[indexIndex].block;if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {while (index != endIndex) {auto& el = *((*block)[index]);*itemFirst++ = std::move(el);el.~T();++index;}}else {MOODYCAMEL_TRY {while (index != endIndex) {auto& el = *((*block)[index]);*itemFirst = std::move(el);++itemFirst;el.~T();++index;}}MOODYCAMEL_CATCH (...) {// It's too late to revert the dequeue, but we can make sure that all// the dequeued objects are properly destroyed and the block index// (and empty count) are properly updated before we propagate the exceptiondo {block = localBlockIndex->entries[indexIndex].block;while (index != endIndex) {(*block)[index++]->~T();}block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);firstIndexInBlock = index;endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;} while (index != firstIndex + actualCount);MOODYCAMEL_RETHROW;}}block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);} while (index != firstIndex + actualCount);return actualCount;}else {// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistentthis->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);}}return 0;}private:struct BlockIndexEntry{index_t base;Block* block;};struct BlockIndexHeader{size_t size;std::atomic<size_t> front;// Current slot (not next, like pr_blockIndexFront)BlockIndexEntry* entries;void* prev;};bool new_block_index(size_t numberOfFilledSlotsToExpose){auto prevBlockSizeMask = pr_blockIndexSize - 1;// Create the new blockpr_blockIndexSize <<= 1;auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));if (newRawPtr == nullptr) {pr_blockIndexSize >>= 1;// Reset to allow graceful retryreturn false;}auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));// Copy in all the old indices, if anysize_t j = 0;if (pr_blockIndexSlotsUsed != 0) {auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;do {newBlockIndexEntries[j++] = pr_blockIndexEntries[i];i = (i + 1) & prevBlockSizeMask;} while (i != pr_blockIndexFront);}// Update everythingauto header = new (newRawPtr) BlockIndexHeader;header->size = pr_blockIndexSize;header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);header->entries = newBlockIndexEntries;header->prev = pr_blockIndexRaw;// we link the new block to the old one so we can free it laterpr_blockIndexFront = j;pr_blockIndexEntries = newBlockIndexEntries;pr_blockIndexRaw = newRawPtr;blockIndex.store(header, std::memory_order_release);return true;}private:std::atomic<BlockIndexHeader*> blockIndex;// To be used by producer only -- consumer must use the ones in referenced by blockIndexsize_t pr_blockIndexSlotsUsed;size_t pr_blockIndexSize;size_t pr_blockIndexFront;// Next slot (not current)BlockIndexEntry* pr_blockIndexEntries;void* pr_blockIndexRaw;#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGpublic:ExplicitProducer* nextExplicitProducer;private:#endif#if MCDBGQ_TRACKMEMfriend struct MemStats;#endif};//////////////////////////////////// Implicit queue//////////////////////////////////struct ImplicitProducer : public ProducerBase{ImplicitProducer(ConcurrentQueue* parent) :ProducerBase(parent, false),nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),blockIndex(nullptr){new_block_index();}~ImplicitProducer(){// Note that since we're in the destructor we can assume that all enqueue/dequeue operations// completed already; this means that all undequeued elements are placed contiguously across// contiguous blocks, and that only the first and last remaining blocks can be only partially// empty (all other remaining blocks must be completely full).#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED// Unregister ourselves for thread termination notificationif (!this->inactive.load(std::memory_order_relaxed)) {details::ThreadExitNotifier::unsubscribe(&threadExitListener);}#endif// Destroy all remaining elements!auto tail = this->tailIndex.load(std::memory_order_relaxed);auto index = this->headIndex.load(std::memory_order_relaxed);Block* block = nullptr;assert(index == tail || details::circular_less_than(index, tail));bool forceFreeLastBlock = index != tail;// If we enter the loop, then the last (tail) block will not be freedwhile (index != tail) {if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {if (block != nullptr) {// Free the old blockthis->parent->add_block_to_free_list(block);}block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);}((*block)[index])->~T();++index;}// Even if the queue is empty, there's still one block that's not on the free list// (unless the head index reached the end of it, in which case the tail will be poised// to create a new block).if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {this->parent->add_block_to_free_list(this->tailBlock);}// Destroy block indexauto localBlockIndex = blockIndex.load(std::memory_order_relaxed);if (localBlockIndex != nullptr) {for (size_t i = 0; i != localBlockIndex->capacity; ++i) {localBlockIndex->index[i]->~BlockIndexEntry();}do {auto prev = localBlockIndex->prev;localBlockIndex->~BlockIndexHeader();(Traits::free)(localBlockIndex);localBlockIndex = prev;} while (localBlockIndex != nullptr);}}template<AllocationMode allocMode, typename U>inline bool enqueue(U&& element){index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);index_t newTailIndex = 1 + currentTailIndex;if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {// We reached the end of a block, start a new oneauto head = this->headIndex.load(std::memory_order_relaxed);assert(!details::circular_less_than<index_t>(currentTailIndex, head));if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {return false;}#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endif// Find out where we'll be inserting this block in the block indexBlockIndexEntry* idxEntry;if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {return false;}// Get ahold of a new blockauto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();if (newBlock == nullptr) {rewind_block_index_tail();idxEntry->value.store(nullptr, std::memory_order_relaxed);return false;}#if MCDBGQ_TRACKMEMnewBlock->owner = this;#endifnewBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {// May throw, try to insert now before we publish the fact that we have this new blockMOODYCAMEL_TRY {new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));}MOODYCAMEL_CATCH (...) {rewind_block_index_tail();idxEntry->value.store(nullptr, std::memory_order_relaxed);this->parent->add_block_to_free_list(newBlock);MOODYCAMEL_RETHROW;}}// Insert the new block into the indexidxEntry->value.store(newBlock, std::memory_order_relaxed);this->tailBlock = newBlock;if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (nullptr) T(std::forward<U>(element)))) {this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}}// Enqueuenew ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}template<typename U>bool dequeue(U& element){// See ExplicitProducer::dequeue for rationale and explanationindex_t tail = this->tailIndex.load(std::memory_order_relaxed);index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {std::atomic_thread_fence(std::memory_order_acquire);index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);assert(overcommit <= myDequeueCount);tail = this->tailIndex.load(std::memory_order_acquire);if (details::likely(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);// Determine which block the element is inauto entry = get_block_index_entry_for_index(index);// Dequeueauto block = entry->value.load(std::memory_order_relaxed);auto& el = *((*block)[index]);if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX// Note: Acquiring the mutex with every dequeue instead of only when a block// is released is very sub-optimal, but it is, after all, purely debug code.debug::DebugLock lock(producer->mutex);#endifstruct Guard {Block* block;index_t index;BlockIndexEntry* entry;ConcurrentQueue* parent;~Guard(){(*block)[index]->~T();if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {entry->value.store(nullptr, std::memory_order_relaxed);parent->add_block_to_free_list(block);}}} guard = { block, index, entry, this->parent };element = std::move(el);}else {element = std::move(el);el.~T();if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {{#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endif// Add the block back into the global free pool (and remove from block index)entry->value.store(nullptr, std::memory_order_relaxed);}this->parent->add_block_to_free_list(block);// releases the above store}}return true;}else {this->dequeueOvercommit.fetch_add(1, std::memory_order_release);}}return false;}template<AllocationMode allocMode, typename It>bool enqueue_bulk(It itemFirst, size_t count){// First, we need to make sure we have enough room to enqueue all of the elements;// this means pre-allocating blocks and putting them in the block index (but only if// all the allocations succeeded).// Note that the tailBlock we start off with may not be owned by us any more;// this happens if it was filled up exactly to the top (setting tailIndex to// the first index of the next block which is not yet allocated), then dequeued// completely (putting it on the free list) before we enqueue again.index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);auto startBlock = this->tailBlock;Block* firstAllocatedBlock = nullptr;auto endBlock = this->tailBlock;// Figure out how many blocks we'll need to allocate, and do sosize_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);if (blockBaseDiff > 0) {#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endifdo {blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);currentTailIndex += static_cast<index_t>(BLOCK_SIZE);// Find out where we'll be inserting this block in the block indexBlockIndexEntry* idxEntry;Block* newBlock;bool indexInserted = false;auto head = this->headIndex.load(std::memory_order_relaxed);assert(!details::circular_less_than<index_t>(currentTailIndex, head));bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {// Index allocation or block allocation failed; revert any other allocations// and index insertions done so far for this operationif (indexInserted) {rewind_block_index_tail();idxEntry->value.store(nullptr, std::memory_order_relaxed);}currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {currentTailIndex += static_cast<index_t>(BLOCK_SIZE);idxEntry = get_block_index_entry_for_index(currentTailIndex);idxEntry->value.store(nullptr, std::memory_order_relaxed);rewind_block_index_tail();}this->parent->add_blocks_to_free_list(firstAllocatedBlock);this->tailBlock = startBlock;return false;}#if MCDBGQ_TRACKMEMnewBlock->owner = this;#endifnewBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();newBlock->next = nullptr;// Insert the new block into the indexidxEntry->value.store(newBlock, std::memory_order_relaxed);// Store the chain of blocks so that we can undo if later allocations fail,// and so that we can find the blocks when we do the actual enqueueingif ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {assert(this->tailBlock != nullptr);this->tailBlock->next = newBlock;}this->tailBlock = newBlock;endBlock = newBlock;firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;} while (blockBaseDiff > 0);}// Enqueue, one block at a timeindex_t newTailIndex = startTailIndex + static_cast<index_t>(count);currentTailIndex = startTailIndex;this->tailBlock = startBlock;assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {this->tailBlock = firstAllocatedBlock;}while (true) {auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {stopIndex = newTailIndex;}if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))) {while (currentTailIndex != stopIndex) {new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);}}else {MOODYCAMEL_TRY {while (currentTailIndex != stopIndex) {new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));++currentTailIndex;++itemFirst;}}MOODYCAMEL_CATCH (...) {auto constructedStopIndex = currentTailIndex;auto lastBlockEnqueued = this->tailBlock;if (!details::is_trivially_destructible<T>::value) {auto block = startBlock;if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {block = firstAllocatedBlock;}currentTailIndex = startTailIndex;while (true) {auto stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {stopIndex = constructedStopIndex;}while (currentTailIndex != stopIndex) {(*block)[currentTailIndex++]->~T();}if (block == lastBlockEnqueued) {break;}block = block->next;}}currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {currentTailIndex += static_cast<index_t>(BLOCK_SIZE);auto idxEntry = get_block_index_entry_for_index(currentTailIndex);idxEntry->value.store(nullptr, std::memory_order_relaxed);rewind_block_index_tail();}this->parent->add_blocks_to_free_list(firstAllocatedBlock);this->tailBlock = startBlock;MOODYCAMEL_RETHROW;}}if (this->tailBlock == endBlock) {assert(currentTailIndex == newTailIndex);break;}this->tailBlock = this->tailBlock->next;}this->tailIndex.store(newTailIndex, std::memory_order_release);return true;}template<typename It>size_t dequeue_bulk(It& itemFirst, size_t max){auto tail = this->tailIndex.load(std::memory_order_relaxed);auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));if (details::circular_less_than<size_t>(0, desiredCount)) {desiredCount = desiredCount < max ? desiredCount : max;std::atomic_thread_fence(std::memory_order_acquire);auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);assert(overcommit <= myDequeueCount);tail = this->tailIndex.load(std::memory_order_acquire);auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));if (details::circular_less_than<size_t>(0, actualCount)) {actualCount = desiredCount < actualCount ? desiredCount : actualCount;if (actualCount < desiredCount) {this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);}// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this// will never exceed tail.auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);// Iterate the blocks and dequeueauto index = firstIndex;BlockIndexHeader* localBlockIndex;auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);do {auto blockStartIndex = index;auto endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;auto entry = localBlockIndex->index[indexIndex];auto block = entry->value.load(std::memory_order_relaxed);if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {while (index != endIndex) {auto& el = *((*block)[index]);*itemFirst++ = std::move(el);el.~T();++index;}}else {MOODYCAMEL_TRY {while (index != endIndex) {auto& el = *((*block)[index]);*itemFirst = std::move(el);++itemFirst;el.~T();++index;}}MOODYCAMEL_CATCH (...) {do {entry = localBlockIndex->index[indexIndex];block = entry->value.load(std::memory_order_relaxed);while (index != endIndex) {(*block)[index++]->~T();}if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endifentry->value.store(nullptr, std::memory_order_relaxed);this->parent->add_block_to_free_list(block);}indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);blockStartIndex = index;endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;} while (index != firstIndex + actualCount);MOODYCAMEL_RETHROW;}}if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {{#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endif// Note that the set_many_empty above did a release, meaning that anybody who acquires the block// we're about to free can use it safely since our writes (and reads!) will have happened-before then.entry->value.store(nullptr, std::memory_order_relaxed);}this->parent->add_block_to_free_list(block);// releases the above store}indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);} while (index != firstIndex + actualCount);return actualCount;}else {this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);}}return 0;}private:// The block size must be > 1, so any number with the low bit set is an invalid block base indexstatic const index_t INVALID_BLOCK_BASE = 1;struct BlockIndexEntry{std::atomic<index_t> key;std::atomic<Block*> value;};struct BlockIndexHeader{size_t capacity;std::atomic<size_t> tail;BlockIndexEntry* entries;BlockIndexEntry** index;BlockIndexHeader* prev;};template<AllocationMode allocMode>inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex){auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);// We're the only writer thread, relaxed is OKauto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);idxEntry = localBlockIndex->index[newTail];if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||idxEntry->value.load(std::memory_order_relaxed) == nullptr) {idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);localBlockIndex->tail.store(newTail, std::memory_order_release);return true;}// No room in the old block index, try to allocate another one!if (allocMode == CannotAlloc || !new_block_index()) {return false;}localBlockIndex = blockIndex.load(std::memory_order_relaxed);newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);idxEntry = localBlockIndex->index[newTail];assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);localBlockIndex->tail.store(newTail, std::memory_order_release);return true;}inline void rewind_block_index_tail(){auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);}inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const{BlockIndexHeader* localBlockIndex;auto idx = get_block_index_index_for_index(index, localBlockIndex);return localBlockIndex->index[idx];}inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const{#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXdebug::DebugLock lock(mutex);#endifindex &= ~static_cast<index_t>(BLOCK_SIZE - 1);localBlockIndex = blockIndex.load(std::memory_order_acquire);auto tail = localBlockIndex->tail.load(std::memory_order_acquire);auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);assert(tailBase != INVALID_BLOCK_BASE);// Note: Must use division instead of shift because the index may wrap around, causing a negative// offset, whose negativity we want to preserveauto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / BLOCK_SIZE);size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);return idx;}bool new_block_index(){auto prev = blockIndex.load(std::memory_order_relaxed);size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;auto raw = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) +std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));if (raw == nullptr) {return false;}auto header = new (raw) BlockIndexHeader;auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));if (prev != nullptr) {auto prevTail = prev->tail.load(std::memory_order_relaxed);auto prevPos = prevTail;size_t i = 0;do {prevPos = (prevPos + 1) & (prev->capacity - 1);index[i++] = prev->index[prevPos];} while (prevPos != prevTail);assert(i == prevCapacity);}for (size_t i = 0; i != entryCount; ++i) {new (entries + i) BlockIndexEntry;entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);index[prevCapacity + i] = entries + i;}header->prev = prev;header->entries = entries;header->index = index;header->capacity = nextBlockIndexCapacity;header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);blockIndex.store(header, std::memory_order_release);nextBlockIndexCapacity <<= 1;return true;}private:size_t nextBlockIndexCapacity;std::atomic<BlockIndexHeader*> blockIndex;#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTEDpublic:details::ThreadExitListener threadExitListener;private:#endif#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGpublic:ImplicitProducer* nextImplicitProducer;private:#endif#if MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEXmutable debug::DebugMutex mutex;#endif#if MCDBGQ_TRACKMEMfriend struct MemStats;#endif};//////////////////////////////////// Block pool manipulation//////////////////////////////////void populate_initial_block_list(size_t blockCount){initialBlockPoolSize = blockCount;if (initialBlockPoolSize == 0) {initialBlockPool = nullptr;return;}initialBlockPool = create_array<Block>(blockCount);if (initialBlockPool == nullptr) {initialBlockPoolSize = 0;}for (size_t i = 0; i < initialBlockPoolSize; ++i) {initialBlockPool[i].dynamicallyAllocated = false;}}inline Block* try_get_block_from_initial_pool(){if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {return nullptr;}auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;}inline void add_block_to_free_list(Block* block){#if MCDBGQ_TRACKMEMblock->owner = nullptr;#endiffreeList.add(block);}inline void add_blocks_to_free_list(Block* block){while (block != nullptr) {auto next = block->next;add_block_to_free_list(block);block = next;}}inline Block* try_get_block_from_free_list(){return freeList.try_get();}// Gets a free block from one of the memory pools, or allocates a new one (if applicable)template<AllocationMode canAlloc>Block* requisition_block(){auto block = try_get_block_from_initial_pool();if (block != nullptr) {return block;}block = try_get_block_from_free_list();if (block != nullptr) {return block;}if (canAlloc == CanAlloc) {return create<Block>();}return nullptr;}#if MCDBGQ_TRACKMEMpublic:struct MemStats {size_t allocatedBlocks;size_t usedBlocks;size_t freeBlocks;size_t ownedBlocksExplicit;size_t ownedBlocksImplicit;size_t implicitProducers;size_t explicitProducers;size_t elementsEnqueued;size_t blockClassBytes;size_t queueClassBytes;size_t implicitBlockIndexBytes;size_t explicitBlockIndexBytes;friend class ConcurrentQueue;private:static MemStats getFor(ConcurrentQueue* q){MemStats stats = { 0 };stats.elementsEnqueued = q->size_approx();auto block = q->freeList.head_unsafe();while (block != nullptr) {++stats.allocatedBlocks;++stats.freeBlocks;block = block->freeListNext.load(std::memory_order_relaxed);}for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;stats.implicitProducers += implicit ? 1 : 0;stats.explicitProducers += implicit ? 0 : 1;if (implicit) {auto prod = static_cast<ImplicitProducer*>(ptr);stats.queueClassBytes += sizeof(ImplicitProducer);auto head = prod->headIndex.load(std::memory_order_relaxed);auto tail = prod->tailIndex.load(std::memory_order_relaxed);auto hash = prod->blockIndex.load(std::memory_order_relaxed);if (hash != nullptr) {for (size_t i = 0; i != hash->capacity; ++i) {if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {++stats.allocatedBlocks;++stats.ownedBlocksImplicit;}}stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);for (; hash != nullptr; hash = hash->prev) {stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);}}for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {//auto block = prod->get_block_index_entry_for_index(head);++stats.usedBlocks;}}else {auto prod = static_cast<ExplicitProducer*>(ptr);stats.queueClassBytes += sizeof(ExplicitProducer);auto tailBlock = prod->tailBlock;bool wasNonEmpty = false;if (tailBlock != nullptr) {auto block = tailBlock;do {++stats.allocatedBlocks;if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {++stats.usedBlocks;wasNonEmpty = wasNonEmpty || block != tailBlock;}++stats.ownedBlocksExplicit;block = block->next;} while (block != tailBlock);}auto index = prod->blockIndex.load(std::memory_order_relaxed);while (index != nullptr) {stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);}}}auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);stats.allocatedBlocks += freeOnInitialPool;stats.freeBlocks += freeOnInitialPool;stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;stats.queueClassBytes += sizeof(ConcurrentQueue);return stats;}};// For debugging only. Not thread-safe.MemStats getMemStats(){return MemStats::getFor(this);}private:friend struct MemStats;#endif//////////////////////////////////// Producer list manipulation//////////////////////////////////ProducerBase* recycle_or_create_producer(bool isExplicit){bool recycled;return recycle_or_create_producer(isExplicit, recycled);}ProducerBase* recycle_or_create_producer(bool isExplicit, bool& recycled){#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASHdebug::DebugLock lock(implicitProdMutex);#endif// Try to re-use one firstfor (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {bool expected = true;if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {// We caught one! It's been marked as activated, the caller can have itrecycled = true;return ptr;}}}recycled = false;return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));}ProducerBase* add_producer(ProducerBase* producer){// Handle failed memory allocationif (producer == nullptr) {return nullptr;}producerCount.fetch_add(1, std::memory_order_relaxed);// Add it to the lock-free listauto prevTail = producerListTail.load(std::memory_order_relaxed);do {producer->next = prevTail;} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGif (producer->isExplicit) {auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);do {static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));}else {auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);do {static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));}#endifreturn producer;}void reown_producers(){// After another instance is moved-into/swapped-with this one, all the// producers we stole still think their parents are the other queue.// So fix them up!for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {ptr->parent = this;}}//////////////////////////////////// Implicit producer hash//////////////////////////////////struct ImplicitProducerKVP{std::atomic<details::thread_id_t> key;ImplicitProducer* value;// No need for atomicity since it's only read by the thread that sets it in the first placeImplicitProducerKVP() : value(nullptr) { }ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT{key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);value = other.value;}inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT{swap(other);return *this;}inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT{if (this != &other) {details::swap_relaxed(key, other.key);std::swap(value, other.value);}}};template<typename XT, typename XTraits>friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;struct ImplicitProducerHash{size_t capacity;ImplicitProducerKVP* entries;ImplicitProducerHash* prev;};inline void populate_initial_implicit_producer_hash(){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;implicitProducerHashCount.store(0, std::memory_order_relaxed);auto hash = &initialImplicitProducerHash;hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;hash->entries = &initialImplicitProducerHashEntries[0];for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);}hash->prev = nullptr;implicitProducerHash.store(hash, std::memory_order_relaxed);}void swap_implicit_producer_hashes(ConcurrentQueue& other){if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return;// Swap (assumes our implicit producer hash is initialized)initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);}else {ImplicitProducerHash* hash;for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {continue;}hash->prev = &initialImplicitProducerHash;}if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);}else {ImplicitProducerHash* hash;for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {continue;}hash->prev = &other.initialImplicitProducerHash;}}// Only fails (returns nullptr) if memory allocation failsImplicitProducer* get_or_add_implicit_producer(){// Note that since the data is essentially thread-local (key is thread ID),// there's a reduced need for fences (memory ordering is already consistent// for any individual thread), except for the current table itself.// Start by looking for the thread ID in the current and all previous hash tables.// If it's not found, it must not be in there yet, since this same thread would// have added it previously to one of the tables that we traversed.// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASHdebug::DebugLock lock(implicitProdMutex);#endifauto id = details::thread_id();auto hashedId = details::hash_thread_id(id);auto mainHash = implicitProducerHash.load(std::memory_order_acquire);for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {// Look for the id in this hashauto index = hashedId;while (true) {// Not an infinite loop because at least one slot is free in the hash tableindex &= hash->capacity - 1;auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);if (probedKey == id) {// Found it! If we had to search several hashes deep, though, we should lazily add it// to the current main hash table to avoid the extended search next time.// Note there's guaranteed to be room in the current hash table since every subsequent// table implicitly reserves space for all previous tables (there's only one// implicitProducerHashCount).auto value = hash->entries[index].value;if (hash != mainHash) {index = hashedId;while (true) {index &= mainHash->capacity - 1;probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);auto empty = details::invalid_thread_id;#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTEDauto reusable = details::invalid_thread_id2;if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed)) ||(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire))) {#elseif ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed))) {#endifmainHash->entries[index].value = value;break;}++index;}}return value;}if (probedKey == details::invalid_thread_id) {break;// Not in this hash table}++index;}}// Insert!auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);while (true) {if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {// We've acquired the resize lock, try to allocate a bigger hash table.// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when// we reload implicitProducerHash it must be the most recent version (it only gets changed within this// locked block).mainHash = implicitProducerHash.load(std::memory_order_acquire);if (newCount >= (mainHash->capacity >> 1)) {auto newCapacity = mainHash->capacity << 1;while (newCount >= (newCapacity >> 1)) {newCapacity <<= 1;}auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));if (raw == nullptr) {// Allocation failedimplicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);return nullptr;}auto newHash = new (raw) ImplicitProducerHash;newHash->capacity = newCapacity;newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));for (size_t i = 0; i != newCapacity; ++i) {new (newHash->entries + i) ImplicitProducerKVP;newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);}newHash->prev = mainHash;implicitProducerHash.store(newHash, std::memory_order_release);implicitProducerHashResizeInProgress.clear(std::memory_order_release);mainHash = newHash;}else {implicitProducerHashResizeInProgress.clear(std::memory_order_release);}}// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table// to finish being allocated by another thread (and if we just finished allocating above, the condition will// always be true)if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {bool recycled;auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false, recycled));if (producer == nullptr) {implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);return nullptr;}if (recycled) {implicitProducerHashCount.fetch_add(-1, std::memory_order_relaxed);}#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTEDproducer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;producer->threadExitListener.userData = producer;details::ThreadExitNotifier::subscribe(&producer->threadExitListener);#endifauto index = hashedId;while (true) {index &= mainHash->capacity - 1;auto probedKey = mainHash->entries[index].key.load(std::memory_order_relaxed);auto empty = details::invalid_thread_id;#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTEDauto reusable = details::invalid_thread_id2;if ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed)) ||(probedKey == reusable && mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_acquire))) {#elseif ((probedKey == empty    && mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_relaxed))) {#endifmainHash->entries[index].value = producer;break;}++index;}return producer;}// Hmm, the old hash is quite full and somebody else is busy allocating a new one.// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,// we try to allocate ourselves).mainHash = implicitProducerHash.load(std::memory_order_acquire);}}#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTEDvoid implicit_producer_thread_exited(ImplicitProducer* producer){// Remove from thread exit listenersdetails::ThreadExitNotifier::unsubscribe(&producer->threadExitListener);// Remove from hash#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASHdebug::DebugLock lock(implicitProdMutex);#endifauto hash = implicitProducerHash.load(std::memory_order_acquire);assert(hash != nullptr);// The thread exit listener is only registered if we were added to a hash in the first placeauto id = details::thread_id();auto hashedId = details::hash_thread_id(id);details::thread_id_t probedKey;// We need to traverse all the hashes just in case other threads aren't on the current one yet and are// trying to add an entry thinking there's a free slot (because they reused a producer)for (; hash != nullptr; hash = hash->prev) {auto index = hashedId;do {index &= hash->capacity - 1;probedKey = hash->entries[index].key.load(std::memory_order_relaxed);if (probedKey == id) {hash->entries[index].key.store(details::invalid_thread_id2, std::memory_order_release);break;}++index;} while (probedKey != details::invalid_thread_id);// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place}// Mark the queue as being recyclableproducer->inactive.store(true, std::memory_order_release);}static void implicit_producer_thread_exited_callback(void* userData){auto producer = static_cast<ImplicitProducer*>(userData);auto queue = producer->parent;queue->implicit_producer_thread_exited(producer);}#endif//////////////////////////////////// Utility functions//////////////////////////////////template<typename U>static inline U* create_array(size_t count){assert(count > 0);auto p = static_cast<U*>((Traits::malloc)(sizeof(U) * count));if (p == nullptr) {return nullptr;}for (size_t i = 0; i != count; ++i) {new (p + i) U();}return p;}template<typename U>static inline void destroy_array(U* p, size_t count){if (p != nullptr) {assert(count > 0);for (size_t i = count; i != 0; ) {(p + --i)->~U();}(Traits::free)(p);}}template<typename U>static inline U* create(){auto p = (Traits::malloc)(sizeof(U));return p != nullptr ? new (p) U : nullptr;}template<typename U, typename A1>static inline U* create(A1&& a1){auto p = (Traits::malloc)(sizeof(U));return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;}template<typename U>static inline void destroy(U* p){if (p != nullptr) {p->~U();}(Traits::free)(p);}private:std::atomic<ProducerBase*> producerListTail;std::atomic<std::uint32_t> producerCount;std::atomic<size_t> initialBlockPoolIndex;Block* initialBlockPool;size_t initialBlockPoolSize;#if !MCDBGQ_USEDEBUGFREELISTFreeList<Block> freeList;#elsedebug::DebugFreeList<Block> freeList;#endifstd::atomic<ImplicitProducerHash*> implicitProducerHash;std::atomic<size_t> implicitProducerHashCount;// Number of slots logically usedImplicitProducerHash initialImplicitProducerHash;std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;std::atomic_flag implicitProducerHashResizeInProgress;std::atomic<std::uint32_t> nextExplicitConsumerId;std::atomic<std::uint32_t> globalExplicitConsumerOffset;#if MCDBGQ_NOLOCKFREE_IMPLICITPRODHASHdebug::DebugMutex implicitProdMutex;#endif#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUGstd::atomic<ExplicitProducer*> explicitProducers;std::atomic<ImplicitProducer*> implicitProducers;#endif};template<typename T, typename Traits>ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue): producer(queue.recycle_or_create_producer(true)){if (producer != nullptr) {producer->token = this;}}template<typename T, typename Traits>ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue): producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true)){if (producer != nullptr) {producer->token = this;}}template<typename T, typename Traits>ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue): itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr){initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);lastKnownGlobalOffset = -1;}template<typename T, typename Traits>ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue): itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr){initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);lastKnownGlobalOffset = -1;}template<typename T, typename Traits>inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT{a.swap(b);}inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT{a.swap(b);}inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT{a.swap(b);}template<typename T, typename Traits>inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT{a.swap(b);}}#if defined(__GNUC__)#pragma GCC diagnostic pop#endif/*************************************************************************    > File Name: main.cpp    > Author: wangzhicheng    > Mail: 2363702560@qq.com     > Created Time: Sat 31 Dec 2016 09:44:59 AM AWST ************************************************************************/#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <iostream>#include <thread>#include <chrono>#include "concurrentqueue.h"using namespace std;using namespace moodycamel;moodycamel::ConcurrentQueue<int>Q(5);void func0() {chrono::milliseconds dura(1000);while(1){for(int i = 0;i < 10;i++) {if(!Q.enqueue(i)) {cout << "i = " << i << " enqueue failed...!" << endl;}}//this_thread::sleep_for(dura);}}void func1() {chrono::milliseconds dura(1000);while(1){int num;if(!Q.try_dequeue(num)){//cerr << "dequeue error...!" << endl;this_thread::sleep_for(dura);continue;}else cout << "num = " << num << endl;}}int main(){thread th1(func0);thread th2(func1);th1.join();th2.join();return 0;}CC=g++all:$(CC) -std=c++11 -g -o ConCurrentQueueTest main.cpp concurrentqueue.h -pthread -lpthread

0 0