doc/v1.0.0/executor_8hpp_source.html

 /*******************************<GINKGO LICENSE>******************************
 Copyright (c) 2017-2019, the Ginkgo authors
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:

 1. Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.

 3. Neither the name of the copyright holder nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/

 #ifndef GKO_CORE_EXECUTOR_HPP_
 #define GKO_CORE_EXECUTOR_HPP_


 #include <memory>
 #include <mutex>
 #include <sstream>
 #include <tuple>
 #include <type_traits>


 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/log/logger.hpp>
 #include <ginkgo/core/synthesizer/containers.hpp>


 struct cublasContext;

 struct cusparseContext;


 namespace gko {


 #define GKO_FORWARD_DECLARE(_type, ...) class _type

 GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_FORWARD_DECLARE);

 #undef GKO_FORWARD_DECLARE


 class ReferenceExecutor;


 namespace detail {


 template <typename>
 class ExecutorBase;


 }  // namespace detail


 class Operation {
 public:
 #define GKO_DECLARE_RUN_OVERLOAD(_type, ...) \
     virtual void run(std::shared_ptr<const _type>) const

     GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_RUN_OVERLOAD);

 #undef GKO_DECLARE_RUN_OVERLOAD

     // ReferenceExecutor overload can be defaulted to OmpExecutor's
     virtual void run(std::shared_ptr<const ReferenceExecutor> executor) const;

     virtual const char *get_name() const noexcept;
 };

 #define GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(_type, _namespace, _kernel)    \
 public:                                                                      \
     void run(std::shared_ptr<const ::gko::_type> exec) const override        \
     {                                                                        \
         this->call(counts{}, exec);                                          \
     }                                                                        \
                                                                              \
 private:                                                                     \
     template <int... Ns>                                                     \
     void call(::gko::syn::value_list<int, Ns...>,                            \
               std::shared_ptr<const ::gko::_type> exec) const                \
     {                                                                        \
         ::gko::kernels::_namespace::_kernel(                                 \
             exec, std::forward<Args>(std::get<Ns>(data))...);                \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
                   "semi-colon warnings")

 #define GKO_DETAIL_DEFINE_RUN_OVERLOAD(_type, _namespace, _kernel, ...)      \
 public:                                                                      \
     void run(std::shared_ptr<const ::gko::_type> exec) const override        \
     {                                                                        \
         this->call(counts{}, exec);                                          \
     }                                                                        \
                                                                              \
 private:                                                                     \
     template <int... Ns>                                                     \
     void call(::gko::syn::value_list<int, Ns...>,                            \
               std::shared_ptr<const ::gko::_type> exec) const                \
     {                                                                        \
         ::gko::kernels::_namespace::_kernel(                                 \
             exec, std::forward<Args>(std::get<Ns>(data))...);                \
     }                                                                        \
     static_assert(true,                                                      \
                   "This assert is used to counter the false positive extra " \
                   "semi-colon warnings")


 #define GKO_REGISTER_OPERATION(_name, _kernel)                                 \
     template <typename... Args>                                                \
     class _name##_operation : public Operation {                               \
         using counts =                                                         \
             ::gko::syn::as_list<::gko::syn::range<0, sizeof...(Args)>>;        \
                                                                                \
     public:                                                                    \
         _name##_operation(Args &&... args) : data(std::forward<Args>(args)...) \
         {}                                                                     \
                                                                                \
         const char *get_name() const noexcept override                         \
         {                                                                      \
             static auto name = [this] {                                        \
                 std::ostringstream oss;                                        \
                 oss << #_kernel << '#' << sizeof...(Args);                     \
                 return oss.str();                                              \
             }();                                                               \
             return name.c_str();                                               \
         }                                                                      \
                                                                                \
         GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(OmpExecutor, omp, _kernel);      \
         GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(CudaExecutor, cuda, _kernel);    \
         GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(ReferenceExecutor, reference,    \
                                               _kernel);                        \
                                                                                \
     private:                                                                   \
         mutable std::tuple<Args &&...> data;                                   \
     };                                                                         \
                                                                                \
     template <typename... Args>                                                \
     static _name##_operation<Args...> make_##_name(Args &&... args)            \
     {                                                                          \
         return _name##_operation<Args...>(std::forward<Args>(args)...);        \
     }                                                                          \
     static_assert(true,                                                        \
                   "This assert is used to counter the false positive extra "   \
                   "semi-colon warnings")


 class Executor : public log::EnableLogging<Executor> {
     template <typename T>
     friend class detail::ExecutorBase;

 public:
     virtual ~Executor() = default;

     Executor() = default;
     Executor(Executor &) = delete;
     Executor(Executor &&) = default;
     Executor &operator=(Executor &) = delete;
     Executor &operator=(Executor &&) = default;

     virtual void run(const Operation &op) const = 0;

     template <typename ClosureOmp, typename ClosureCuda>
     void run(const ClosureOmp &op_omp, const ClosureCuda &op_cuda) const
     {
         LambdaOperation<ClosureOmp, ClosureCuda> op(op_omp, op_cuda);
         this->run(op);
     }

     template <typename T>
     T *alloc(size_type num_elems) const
     {
         this->template log<log::Logger::allocation_started>(
             this, num_elems * sizeof(T));
         T *allocated = static_cast<T *>(this->raw_alloc(num_elems * sizeof(T)));
         this->template log<log::Logger::allocation_completed>(
             this, num_elems * sizeof(T), reinterpret_cast<uintptr>(allocated));
         return allocated;
     }

     void free(void *ptr) const noexcept
     {
         this->template log<log::Logger::free_started>(
             this, reinterpret_cast<uintptr>(ptr));
         this->raw_free(ptr);
         this->template log<log::Logger::free_completed>(
             this, reinterpret_cast<uintptr>(ptr));
     }

     template <typename T>
     void copy_from(const Executor *src_exec, size_type num_elems,
                    const T *src_ptr, T *dest_ptr) const
     {
         this->template log<log::Logger::copy_started>(
             src_exec, this, reinterpret_cast<uintptr>(src_ptr),
             reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
         this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr);
         this->template log<log::Logger::copy_completed>(
             src_exec, this, reinterpret_cast<uintptr>(src_ptr),
             reinterpret_cast<uintptr>(dest_ptr), num_elems * sizeof(T));
     }

     virtual std::shared_ptr<Executor> get_master() noexcept = 0;

     virtual std::shared_ptr<const Executor> get_master() const noexcept = 0;

     virtual void synchronize() const = 0;

 protected:
     virtual void *raw_alloc(size_type size) const = 0;

     virtual void raw_free(void *ptr) const noexcept = 0;

     virtual void raw_copy_from(const Executor *src_exec, size_type n_bytes,
                                const void *src_ptr, void *dest_ptr) const = 0;

 #define GKO_ENABLE_RAW_COPY_TO(_exec_type, ...)                              \
     virtual void raw_copy_to(const _exec_type *dest_exec, size_type n_bytes, \
                              const void *src_ptr, void *dest_ptr) const = 0

     GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_RAW_COPY_TO);

 #undef GKO_ENABLE_RAW_COPY_TO

 private:
     template <typename ClosureOmp, typename ClosureCuda>
     class LambdaOperation : public Operation {
     public:
         LambdaOperation(const ClosureOmp &op_omp, const ClosureCuda &op_cuda)
             : op_omp_(op_omp), op_cuda_(op_cuda)
         {}

         void run(std::shared_ptr<const OmpExecutor>) const override
         {
             op_omp_();
         }

         void run(std::shared_ptr<const CudaExecutor>) const override
         {
             op_cuda_();
         }

     private:
         ClosureOmp op_omp_;
         ClosureCuda op_cuda_;
     };
 };


 template <typename T>
 class executor_deleter {
 public:
     using pointer = T *;

     explicit executor_deleter(std::shared_ptr<const Executor> exec)
         : exec_{exec}
     {}

     void operator()(pointer ptr) const
     {
         if (exec_) {
             exec_->free(ptr);
         }
     }

 private:
     std::shared_ptr<const Executor> exec_;
 };

 // a specialization for arrays
 template <typename T>
 class executor_deleter<T[]> {
 public:
     using pointer = T[];

     explicit executor_deleter(std::shared_ptr<const Executor> exec)
         : exec_{exec}
     {}

     void operator()(pointer ptr) const
     {
         if (exec_) {
             exec_->free(ptr);
         }
     }

 private:
     std::shared_ptr<const Executor> exec_;
 };


 namespace detail {


 template <typename ConcreteExecutor>
 class ExecutorBase : public Executor {
 public:
     void run(const Operation &op) const override
     {
         this->template log<log::Logger::operation_launched>(this, &op);
         op.run(self()->shared_from_this());
         this->template log<log::Logger::operation_completed>(this, &op);
     }

 protected:
     void raw_copy_from(const Executor *src_exec, size_type n_bytes,
                        const void *src_ptr, void *dest_ptr) const override
     {
         src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr);
     }

 private:
     ConcreteExecutor *self() noexcept
     {
         return static_cast<ConcreteExecutor *>(this);
     }

     const ConcreteExecutor *self() const noexcept
     {
         return static_cast<const ConcreteExecutor *>(this);
     }
 };


 }  // namespace detail


 #define GKO_OVERRIDE_RAW_COPY_TO(_executor_type, ...)                    \
     void raw_copy_to(const _executor_type *dest_exec, size_type n_bytes, \
                      const void *src_ptr, void *dest_ptr) const override


 class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
                     public std::enable_shared_from_this<OmpExecutor> {
     friend class detail::ExecutorBase<OmpExecutor>;

 public:
     static std::shared_ptr<OmpExecutor> create()
     {
         return std::shared_ptr<OmpExecutor>(new OmpExecutor());
     }

     std::shared_ptr<Executor> get_master() noexcept override;

     std::shared_ptr<const Executor> get_master() const noexcept override;

     void synchronize() const override;

 protected:
     OmpExecutor() = default;

     void *raw_alloc(size_type size) const override;

     void raw_free(void *ptr) const noexcept override;

     GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO);
 };


 namespace kernels {
 namespace omp {
 using DefaultExecutor = OmpExecutor;
 }  // namespace omp
 }  // namespace kernels


 class ReferenceExecutor : public OmpExecutor {
 public:
     static std::shared_ptr<ReferenceExecutor> create()
     {
         return std::shared_ptr<ReferenceExecutor>(new ReferenceExecutor());
     }

     void run(const Operation &op) const override
     {
         this->template log<log::Logger::operation_launched>(this, &op);
         op.run(std::static_pointer_cast<const ReferenceExecutor>(
             this->shared_from_this()));
         this->template log<log::Logger::operation_completed>(this, &op);
     }

 protected:
     ReferenceExecutor() = default;
 };


 namespace kernels {
 namespace reference {
 using DefaultExecutor = ReferenceExecutor;
 }  // namespace reference
 }  // namespace kernels


 class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
                      public std::enable_shared_from_this<CudaExecutor> {
     friend class ExecutorBase<CudaExecutor>;

 public:
     static std::shared_ptr<CudaExecutor> create(
         int device_id, std::shared_ptr<Executor> master);

     ~CudaExecutor() { decrease_num_execs(this->device_id_); }

     std::shared_ptr<Executor> get_master() noexcept override;

     std::shared_ptr<const Executor> get_master() const noexcept override;

     void synchronize() const override;

     void run(const Operation &op) const override;

     int get_device_id() const noexcept { return device_id_; }

     static int get_num_devices();

     int get_num_cores_per_sm() const noexcept { return num_cores_per_sm_; }

     int get_num_multiprocessor() const noexcept { return num_multiprocessor_; }

     int get_num_warps() const noexcept
     {
         constexpr uint32 warp_size = 32;
         auto warps_per_sm = num_cores_per_sm_ / warp_size;
         return num_multiprocessor_ * warps_per_sm;
     }

     int get_major_version() const noexcept { return major_; }

     int get_minor_version() const noexcept { return minor_; }

     cublasContext *get_cublas_handle() const { return cublas_handle_.get(); }

     cusparseContext *get_cusparse_handle() const
     {
         return cusparse_handle_.get();
     }

 protected:
     void set_gpu_property();

     void init_handles();

     CudaExecutor(int device_id, std::shared_ptr<Executor> master)
         : device_id_(device_id),
           master_(master),
           num_cores_per_sm_(0),
           num_multiprocessor_(0),
           major_(0),
           minor_(0)
     {
         assert(device_id < max_devices);
         this->set_gpu_property();
         this->init_handles();
         increase_num_execs(device_id);
     }

     void *raw_alloc(size_type size) const override;

     void raw_free(void *ptr) const noexcept override;

     GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO);

     static void increase_num_execs(int device_id)
     {
         std::lock_guard<std::mutex> guard(mutex[device_id]);
         num_execs[device_id]++;
     }

     static void decrease_num_execs(int device_id)
     {
         std::lock_guard<std::mutex> guard(mutex[device_id]);
         num_execs[device_id]--;
     }

     static int get_num_execs(int device_id)
     {
         std::lock_guard<std::mutex> guard(mutex[device_id]);
         return num_execs[device_id];
     }

 private:
     int device_id_;
     std::shared_ptr<Executor> master_;
     int num_cores_per_sm_;
     int num_multiprocessor_;
     int major_;
     int minor_;

     template <typename T>
     using handle_manager = std::unique_ptr<T, std::function<void(T *)>>;
     handle_manager<cublasContext> cublas_handle_;
     handle_manager<cusparseContext> cusparse_handle_;

     static constexpr int max_devices = 64;
     static int num_execs[max_devices];
     static std::mutex mutex[max_devices];
 };


 namespace kernels {
 namespace cuda {
 using DefaultExecutor = CudaExecutor;
 }  // namespace cuda
 }  // namespace kernels


 #undef GKO_OVERRIDE_RAW_COPY_TO


 }  // namespace gko


 #endif  // GKO_CORE_EXECUTOR_HPP_
gko::CudaExecutor::get_major_version
int get_major_version() const noexcept
Get the major verion of compute capability.
Definition: executor.hpp:862

gko::CudaExecutor::get_device_id
int get_device_id() const noexcept
Get the CUDA device id of the device associated to this executor.
Definition: executor.hpp:832

gko::executor_deleter::executor_deleter
executor_deleter(std::shared_ptr< const Executor > exec)
Creates a new deleter.
Definition: executor.hpp:637

gko::executor_deleter< T[]>
Definition: executor.hpp:659

gko::uint32
std::uint32_t uint32
32-bit unsigned integral type.
Definition: types.hpp:134

gko::ReferenceExecutor::run
void run(const Operation &op) const override
Runs the specified Operation using this Executor.
Definition: executor.hpp:778

gko::Executor::copy_from
void copy_from(const Executor *src_exec, size_type num_elems, const T *src_ptr, T *dest_ptr) const
Copies data from another Executor.
Definition: executor.hpp:498

gko::Executor::alloc
T * alloc(size_type num_elems) const
Allocates memory in this Executor.
Definition: executor.hpp:459

gko::executor_deleter
This is a deleter that uses an executor&#39;s free method to deallocate the data.
Definition: executor.hpp:628

gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:94

gko::CudaExecutor::get_num_cores_per_sm
int get_num_cores_per_sm() const noexcept
Get the number of cores per SM of this executor.
Definition: executor.hpp:842

gko::CudaExecutor::get_num_multiprocessor
int get_num_multiprocessor() const noexcept
Get the number of multiprocessor of this executor.
Definition: executor.hpp:847

gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:45

gko::ReferenceExecutor
This is a specialization of the OmpExecutor, which runs the reference implementations of the kernels ...
Definition: executor.hpp:771

gko::Executor::run
void run(const ClosureOmp &op_omp, const ClosureCuda &op_cuda) const
Runs one of the passed in functors, depending on the Executor type.
Definition: executor.hpp:441

gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create()
Creates a new OmpExecutor.
Definition: executor.hpp:735

gko::log::EnableLogging
EnableLogging is a mixin which should be inherited by any class which wants to enable logging...
Definition: logger.hpp:521

gko::CudaExecutor::get_cublas_handle
cublasContext * get_cublas_handle() const
Get the cublas handle for this executor.
Definition: executor.hpp:874

gko::CudaExecutor
This is the Executor subclass which represents the CUDA device.
Definition: executor.hpp:804

gko::Executor::free
void free(void *ptr) const noexcept
Frees memory previously allocated with Executor::alloc().
Definition: executor.hpp:476

gko::Operation
Operations can be used to define functionalities whose implementations differ among devices...
Definition: executor.hpp:173

gko::OmpExecutor
This is the Executor subclass which represents the OpenMP device (typically CPU). ...
Definition: executor.hpp:727

gko::CudaExecutor::get_cusparse_handle
cusparseContext * get_cusparse_handle() const
Get the cusparse handle for this executor.
Definition: executor.hpp:881

gko::executor_deleter::operator()
void operator()(pointer ptr) const
Deletes the object.
Definition: executor.hpp:646

gko::CudaExecutor::get_num_warps
int get_num_warps() const noexcept
Get the number of warps of this executor.
Definition: executor.hpp:852

gko::CudaExecutor::get_minor_version
int get_minor_version() const noexcept
Get the minor verion of compute capability.
Definition: executor.hpp:867

gko::Executor
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:410