Ginkgo  Generated from pipelines/1589998975 branch based on develop. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
The papi-logging program

The papi logging example..

This example depends on simple-solver-logging.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

The commented program

}
template <typename T>
std::string to_string(T* ptr)
{
std::ostringstream os;
os << reinterpret_cast<gko::uintptr>(ptr);
return os.str();
}
} // namespace
int init_papi_counters(std::string solver_name, std::string A_name)
{

Initialize PAPI, add events and start it up

int eventset = PAPI_NULL;
int ret_val = PAPI_library_init(PAPI_VER_CURRENT);
if (ret_val != PAPI_VER_CURRENT) {
std::cerr << "Error at PAPI_library_init()" << std::endl;
std::exit(-1);
}
ret_val = PAPI_create_eventset(&eventset);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_create_eventset()" << std::endl;
std::exit(-1);
}
std::string simple_apply_string("sde:::ginkgo0::linop_apply_completed::");
std::string advanced_apply_string(
"sde:::ginkgo0::linop_advanced_apply_completed::");
papi_add_event(simple_apply_string + solver_name, eventset);
papi_add_event(simple_apply_string + A_name, eventset);
papi_add_event(advanced_apply_string + A_name, eventset);
ret_val = PAPI_start(eventset);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_start()" << std::endl;
std::exit(-1);
}
return eventset;
}
void print_papi_counters(int eventset)
{

Stop PAPI and read the linop_apply_completed event for all of them

long long int values[3];
int ret_val = PAPI_stop(eventset, values);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_stop()" << std::endl;
std::exit(-1);
}
PAPI_shutdown();

Print all values returned from PAPI

std::cout << "PAPI SDE counters:" << std::endl;
std::cout << "solver did " << values[0] << " applies." << std::endl;
std::cout << "A did " << values[1] << " simple applies." << std::endl;
std::cout << "A did " << values[2] << " advanced applies." << std::endl;
}
int main(int argc, char* argv[])
{

Some shortcuts

using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;

Print version information

std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}

Figure out where to run the code

const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)(); // throws if not valid

Read data

auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
auto b = gko::read<vec>(std::ifstream("data/b.mtx"), exec);
auto x = gko::read<vec>(std::ifstream("data/x0.mtx"), exec);

Generate solver

const RealValueType reduction_factor{1e-7};
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
.with_reduction_factor(reduction_factor))
.on(exec);
auto solver = solver_gen->generate(A);

In this example, we split as much as possible the Ginkgo solver/logger and the PAPI interface. Note that the PAPI ginkgo namespaces are of the form sde:::ginkgo<x> where <x> starts from 0 and is incremented with every new PAPI logger.

int eventset =
init_papi_counters(to_string(solver.get()), to_string(A.get()));

Create a PAPI logger and add it to relevant LinOps

gko::log::Logger::linop_apply_completed_mask |
gko::log::Logger::linop_advanced_apply_completed_mask);
solver->add_logger(logger);
A->add_logger(logger);

Solve system

solver->apply(b, x);

Stop PAPI event gathering and print the counters

print_papi_counters(eventset);

Print solution

std::cout << "Solution (x): \n";
write(std::cout, x);

Calculate residual

auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto res = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(res);
std::cout << "Residual norm sqrt(r^T r): \n";
write(std::cout, res);
}

Results

The following is the expected result:

PAPI SDE counters:
solver did 1 applies.
A did 20 simple applies.
A did 1 advanced applies.
Solution (x):
%%MatrixMarket matrix array real general
19 1
0.252218
0.108645
0.0662811
0.0630433
0.0384088
0.0396536
0.0402648
0.0338935
0.0193098
0.0234653
0.0211499
0.0196413
0.0199151
0.0181674
0.0162722
0.0150714
0.0107016
0.0121141
0.0123025
Residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
8.87107e-16

Comments about programming and debugging

The plain program

#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <thread>
#include <papi.h>
#include <ginkgo/ginkgo.hpp>
namespace {
void papi_add_event(const std::string& event_name, int& eventset)
{
int code;
int ret_val = PAPI_event_name_to_code(event_name.c_str(), &code);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_name_to_code()" << std::endl;
std::exit(-1);
}
ret_val = PAPI_add_event(eventset, code);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_name_to_code()" << std::endl;
std::exit(-1);
}
}
template <typename T>
std::string to_string(T* ptr)
{
std::ostringstream os;
os << reinterpret_cast<gko::uintptr>(ptr);
return os.str();
}
} // namespace
int init_papi_counters(std::string solver_name, std::string A_name)
{
int eventset = PAPI_NULL;
int ret_val = PAPI_library_init(PAPI_VER_CURRENT);
if (ret_val != PAPI_VER_CURRENT) {
std::cerr << "Error at PAPI_library_init()" << std::endl;
std::exit(-1);
}
ret_val = PAPI_create_eventset(&eventset);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_create_eventset()" << std::endl;
std::exit(-1);
}
std::string simple_apply_string("sde:::ginkgo0::linop_apply_completed::");
std::string advanced_apply_string(
"sde:::ginkgo0::linop_advanced_apply_completed::");
papi_add_event(simple_apply_string + solver_name, eventset);
papi_add_event(simple_apply_string + A_name, eventset);
papi_add_event(advanced_apply_string + A_name, eventset);
ret_val = PAPI_start(eventset);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_start()" << std::endl;
std::exit(-1);
}
return eventset;
}
void print_papi_counters(int eventset)
{
long long int values[3];
int ret_val = PAPI_stop(eventset, values);
if (PAPI_OK != ret_val) {
std::cerr << "Error at PAPI_stop()" << std::endl;
std::exit(-1);
}
PAPI_shutdown();
std::cout << "PAPI SDE counters:" << std::endl;
std::cout << "solver did " << values[0] << " applies." << std::endl;
std::cout << "A did " << values[1] << " simple applies." << std::endl;
std::cout << "A did " << values[2] << " advanced applies." << std::endl;
}
int main(int argc, char* argv[])
{
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
auto b = gko::read<vec>(std::ifstream("data/b.mtx"), exec);
auto x = gko::read<vec>(std::ifstream("data/x0.mtx"), exec);
const RealValueType reduction_factor{1e-7};
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(20u),
.with_reduction_factor(reduction_factor))
.on(exec);
auto solver = solver_gen->generate(A);
int eventset =
init_papi_counters(to_string(solver.get()), to_string(A.get()));
gko::log::Logger::linop_apply_completed_mask |
gko::log::Logger::linop_advanced_apply_completed_mask);
solver->add_logger(logger);
A->add_logger(logger);
solver->apply(b, x);
print_papi_counters(eventset);
std::cout << "Solution (x): \n";
write(std::cout, x);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto res = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(res);
std::cout << "Residual norm sqrt(r^T r): \n";
write(std::cout, res);
}
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:28
gko::log::profile_event_category::solver
Solver events.
gko::layout_type::array
The matrix should be written as dense matrix in column-major order.
gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:19
gko::log::Papi::create
static std::shared_ptr< Papi > create(std::shared_ptr< const gko::Executor >, const Logger::mask_type &enabled_events=Logger::all_events_mask)
Creates a Papi Logger.
Definition: papi.hpp:181
gko::HipExecutor::create
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
gko::version_info::get
static const version_info & get()
Returns an instance of version_info.
Definition: version.hpp:139
gko::stop::ResidualNorm
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition: residual_norm.hpp:113
gko::solver::Cg
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition: cg.hpp:48
gko::write
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType >>>::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition: mtx_io.hpp:295
gko::share
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition: utils_helper.hpp:224
gko::CudaExecutor::create
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition: executor.hpp:1396
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:260
gko::DpcppExecutor::create
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
gko::real
constexpr auto real(const T &x)
Returns the real part of the object.
Definition: math.hpp:869
gko::one
constexpr T one()
Returns the multiplicative identity for T.
Definition: math.hpp:630