Ginkgo  Generated from pipelines/1068515030 branch based on master. Ginkgo version 1.7.0
A numerical linear algebra library targeting many-core architectures
The performance-debugging program

The simple solver with performance debugging example..

This example depends on simple-solver-logging, minimal-cuda-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

This example runs a solver on a test problem and shows how to use loggers to debug performance and convergence rate.

The commented program

#include <ginkgo/ginkgo.hpp>
#include <algorithm>
#include <array>
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <ostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
template <typename ValueType>
template <typename ValueType>
namespace utils {

creates a zero vector

template <typename ValueType>
std::unique_ptr<vec<ValueType>> create_vector(
std::shared_ptr<const gko::Executor> exec, gko::size_type size,
ValueType value)
{
auto res = vec<ValueType>::create(exec);
res->read(gko::matrix_data<ValueType>(gko::dim<2>{size, 1}, value));
return res;
}

utilities for computing norms and residuals

template <typename ValueType>
ValueType get_first_element(const vec<ValueType>* norm)
{
return norm->get_executor()->copy_val_to_host(norm->get_const_values());
}
template <typename ValueType>
gko::remove_complex<ValueType> compute_norm(const vec<ValueType>* b)
{
auto exec = b->get_executor();
auto b_norm = gko::initialize<real_vec<ValueType>>({0.0}, exec);
b->compute_norm2(b_norm);
return get_first_element(b_norm.get());
}
template <typename ValueType>
gko::remove_complex<ValueType> compute_residual_norm(
const gko::LinOp* system_matrix, const vec<ValueType>* b,
const vec<ValueType>* x)
{
auto exec = system_matrix->get_executor();
auto one = gko::initialize<vec<ValueType>>({1.0}, exec);
auto neg_one = gko::initialize<vec<ValueType>>({-1.0}, exec);
auto res = gko::clone(b);
system_matrix->apply(one, x, neg_one, res);
return compute_norm(res.get());
}
} // namespace utils
namespace loggers {

A logger that accumulates the time of all operations. For each operation type (allocations, free, copy, internal operations i.e. kernels), the timing is taken before and after. This can create significant overhead since to ensure proper timings, calls to synchronize are required.

struct OperationLogger : gko::log::Logger {
void on_allocation_started(const gko::Executor* exec,
const gko::size_type&) const override
{
this->start_operation(exec, "allocate");
}
void on_allocation_completed(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->end_operation(exec, "allocate");
}
void on_free_started(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->start_operation(exec, "free");
}
void on_free_completed(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->end_operation(exec, "free");
}
void on_copy_started(const gko::Executor* from, const gko::Executor* to,
const gko::uintptr&, const gko::uintptr&,
const gko::size_type&) const override
{
from->synchronize();
this->start_operation(to, "copy");
}
void on_copy_completed(const gko::Executor* from, const gko::Executor* to,
const gko::uintptr&, const gko::uintptr&,
const gko::size_type&) const override
{
from->synchronize();
this->end_operation(to, "copy");
}
void on_operation_launched(const gko::Executor* exec,
const gko::Operation* op) const override
{
this->start_operation(exec, op->get_name());
}
void on_operation_completed(const gko::Executor* exec,
const gko::Operation* op) const override
{
this->end_operation(exec, op->get_name());
}
void write_data(std::ostream& ostream)
{
for (const auto& entry : total) {
ostream << "\t" << entry.first.c_str() << ": "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(
entry.second)
.count()
<< std::endl;
}
}
private:

Helper which synchronizes and starts the time before every operation.

void start_operation(const gko::Executor* exec,
const std::string& name) const
{
nested.emplace_back(0);
exec->synchronize();
start[name] = std::chrono::steady_clock::now();
}

Helper to compute the end time and store the operation's time at its end. Also time nested operations.

void end_operation(const gko::Executor* exec, const std::string& name) const
{
exec->synchronize();
const auto end = std::chrono::steady_clock::now();
const auto diff = end - start[name];

make sure timings for nested operations are not counted twice

total[name] += diff - nested.back();
nested.pop_back();
if (nested.size() > 0) {
nested.back() += diff;
}
}
mutable std::map<std::string, std::chrono::steady_clock::time_point> start;
mutable std::map<std::string, std::chrono::steady_clock::duration> total;

the position i of this vector holds the total time spend on child operations on nesting level i

mutable std::vector<std::chrono::steady_clock::duration> nested;
};

This logger tracks the persistently allocated data

struct StorageLogger : gko::log::Logger {

Store amount of bytes allocated on every allocation

void on_allocation_completed(const gko::Executor*,
const gko::size_type& num_bytes,
const gko::uintptr& location) const override
{
storage[location] = num_bytes;
}

Reset the amount of bytes on every free

void on_free_completed(const gko::Executor*,
const gko::uintptr& location) const override
{
storage[location] = 0;
}

Write the data after summing the total from all allocations

void write_data(std::ostream& ostream)
{
gko::size_type total{};
for (const auto& e : storage) {
total += e.second;
}
ostream << "Storage: " << total << std::endl;
}
private:
mutable std::unordered_map<gko::uintptr, gko::size_type> storage;
};

Logs true and recurrent residuals of the solver

template <typename ValueType>
struct ResidualLogger : gko::log::Logger {

Depending on the available information, store the norm or compute it from the residual. If the true residual norm could not be computed, store the value -1.0.

void on_iteration_complete(const gko::LinOp*, const gko::size_type&,
const gko::LinOp* residual,
const gko::LinOp* solution,
const gko::LinOp* residual_norm) const override
{
if (residual_norm) {
rec_res_norms.push_back(utils::get_first_element(
gko::as<real_vec<ValueType>>(residual_norm)));
} else {
rec_res_norms.push_back(
utils::compute_norm(gko::as<vec<ValueType>>(residual)));
}
if (solution) {
true_res_norms.push_back(utils::compute_residual_norm(
matrix, b, gko::as<vec<ValueType>>(solution)));
} else {
true_res_norms.push_back(-1.0);
}
}
ResidualLogger(const gko::LinOp* matrix, const vec<ValueType>* b)
: gko::log::Logger(gko::log::Logger::iteration_complete_mask),
matrix{matrix},
b{b}
{}
void write_data(std::ostream& ostream)
{
ostream << "Recurrent Residual Norms: " << std::endl;
ostream << "[" << std::endl;
for (const auto& entry : rec_res_norms) {
ostream << "\t" << entry << std::endl;
}
ostream << "];" << std::endl;
ostream << "True Residual Norms: " << std::endl;
ostream << "[" << std::endl;
for (const auto& entry : true_res_norms) {
ostream << "\t" << entry << std::endl;
}
ostream << "];" << std::endl;
}
private:
const gko::LinOp* matrix;
const vec<ValueType>* b;
mutable std::vector<gko::remove_complex<ValueType>> rec_res_norms;
mutable std::vector<gko::remove_complex<ValueType>> true_res_norms;
};
} // namespace loggers
namespace {

Print usage help

void print_usage(const char* filename)
{
std::cerr << "Usage: " << filename << " [executor] [matrix file]"
<< std::endl;
std::cerr << "matrix file should be a file in matrix market format. "
"The file data/A.mtx is provided as an example."
<< std::endl;
std::exit(-1);
}
template <typename ValueType>
void print_vector(const gko::matrix::Dense<ValueType>* vec)
{
auto elements_to_print = std::min(gko::size_type(10), vec->get_size()[0]);
std::cout << "[" << std::endl;
for (int i = 0; i < elements_to_print; ++i) {
std::cout << "\t" << vec->at(i) << std::endl;
}
std::cout << "];" << std::endl;
}
} // namespace
int main(int argc, char* argv[])
{

Parametrize the benchmark here Pick a value type

using ValueType = double;
using IndexType = int;

Pick a matrix format

Pick a solver

Pick a preconditioner type

Pick a residual norm reduction value

const gko::remove_complex<ValueType> reduction_factor = 1e-12;

Pick an output file name

const auto of_name = "log.txt";

Simple shortcut

Print version information

std::cout << gko::version_info::get() << std::endl;

Figure out where to run the code

if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}

Figure out where to run the code

const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)(); // throws if not valid

Read the input matrix file directory

std::string input_mtx = "data/A.mtx";
if (argc == 3) {
input_mtx = std::string(argv[2]);
}

Read data: A is read from disk Create a StorageLogger to track the size of A

auto storage_logger = std::make_shared<loggers::StorageLogger>();

Add the logger to the executor

exec->add_logger(storage_logger);

Read the matrix A from file

auto A = gko::share(gko::read<mtx>(std::ifstream(input_mtx), exec));

Remove the storage logger

exec->remove_logger(storage_logger);

Pick a maximum iteration count

const auto max_iters = A->get_size()[0];

Generate b and x vectors

auto b = utils::create_vector<ValueType>(exec, A->get_size()[0], 1.0);
auto x = utils::create_vector<ValueType>(exec, A->get_size()[0], 0.0);

Declare the solver factory. The preconditioner's arguments should be adapted if needed.

auto solver_factory =
solver::build()
.with_criteria(
.with_reduction_factor(reduction_factor),
gko::stop::Iteration::build().with_max_iters(max_iters))
.with_preconditioner(preconditioner::create(exec))
.on(exec);

Declare the output file for all our loggers

std::ofstream output_file(of_name);

Do a warmup run

{

Clone x to not overwrite the original one

auto x_clone = gko::clone(x);

Generate and call apply on a solver

solver_factory->generate(A)->apply(b, x_clone);
exec->synchronize();
}

Do a timed run

{

Clone x to not overwrite the original one

auto x_clone = gko::clone(x);

Synchronize ensures no operation are ongoing

exec->synchronize();

Time before generate

auto g_tic = std::chrono::steady_clock::now();

Generate a solver

auto generated_solver = solver_factory->generate(A);
exec->synchronize();

Time after generate

auto g_tac = std::chrono::steady_clock::now();

Compute the generation time

auto generate_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(g_tac - g_tic);

Write the generate time to the output file

output_file << "Generate time (ns): " << generate_time.count()
<< std::endl;

Similarly time the apply

exec->synchronize();
auto a_tic = std::chrono::steady_clock::now();
generated_solver->apply(b, x_clone);
exec->synchronize();
auto a_tac = std::chrono::steady_clock::now();
auto apply_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(a_tac - a_tic);
output_file << "Apply time (ns): " << apply_time.count() << std::endl;

Compute the residual norm

auto residual =
utils::compute_residual_norm(A.get(), b.get(), x_clone.get());
output_file << "Residual_norm: " << residual << std::endl;
}

Log the internal operations using the OperationLogger without timing

{

Create an OperationLogger to analyze the generate step

auto gen_logger = std::make_shared<loggers::OperationLogger>();

Add the generate logger to the executor

exec->add_logger(gen_logger);

Generate a solver

auto generated_solver = solver_factory->generate(A);

Remove the generate logger from the executor

exec->remove_logger(gen_logger);

Write the data to the output file

output_file << "Generate operations times (ns):" << std::endl;
gen_logger->write_data(output_file);

Create an OperationLogger to analyze the apply step

auto apply_logger = std::make_shared<loggers::OperationLogger>();
exec->add_logger(apply_logger);

Create a ResidualLogger to log the recurent residual

auto res_logger = std::make_shared<loggers::ResidualLogger<ValueType>>(
A.get(), b.get());
generated_solver->add_logger(res_logger);

Solve the system

generated_solver->apply(b, x);
exec->remove_logger(apply_logger);

Write the data to the output file

output_file << "Apply operations times (ns):" << std::endl;
apply_logger->write_data(output_file);
res_logger->write_data(output_file);
}

Print solution

std::cout << "Solution, first ten entries: \n";
print_vector(x.get());

Print output file location

std::cout << "The performance and residual data can be found in " << of_name
<< std::endl;
}

Results

This is the expected standard output:

Solution, first ten entries:
[
0.252218
0.108645
0.0662811
0.0630433
0.0384088
0.0396536
0.0402648
0.0338935
0.0193098
0.0234653
];
The performance and residual data can be found in log.txt

Here is a sample output in the file log.txt:

Generate time (ns): 861
Apply time (ns): 108144
Residual_norm: 2.10788e-15
Generate operations times (ns):
Apply operations times (ns):
allocate: 14991
cg::initialize#8: 872
cg::step_1#5: 7683
cg::step_2#7: 7756
copy: 7751
csr::advanced_spmv#5: 21819
csr::spmv#3: 20429
dense::compute_dot#3: 18043
dense::compute_norm2#2: 16726
free: 8857
residual_norm::residual_norm#9: 3614
Recurrent Residual Norms:
[
4.3589
2.30455
1.46771
0.984875
0.741833
0.513623
0.384165
0.316439
0.227709
0.170312
0.0973722
0.0616831
0.0454123
0.031953
0.0161606
0.00657015
0.00264367
0.000858809
0.000286461
1.64195e-15
];
True Residual Norms:
[
4.3589
2.30455
1.46771
0.984875
0.741833
0.513623
0.384165
0.316439
0.227709
0.170312
0.0973722
0.0616831
0.0454123
0.031953
0.0161606
0.00657015
0.00264367
0.000858809
0.000286461
2.10788e-15
];

Comments about programming and debugging

The plain program

/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2023, the Ginkgo authors
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/
#include <ginkgo/ginkgo.hpp>
#include <algorithm>
#include <array>
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <ostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
template <typename ValueType>
template <typename ValueType>
namespace utils {
template <typename ValueType>
std::unique_ptr<vec<ValueType>> create_vector(
std::shared_ptr<const gko::Executor> exec, gko::size_type size,
ValueType value)
{
auto res = vec<ValueType>::create(exec);
res->read(gko::matrix_data<ValueType>(gko::dim<2>{size, 1}, value));
return res;
}
template <typename ValueType>
ValueType get_first_element(const vec<ValueType>* norm)
{
return norm->get_executor()->copy_val_to_host(norm->get_const_values());
}
template <typename ValueType>
gko::remove_complex<ValueType> compute_norm(const vec<ValueType>* b)
{
auto exec = b->get_executor();
auto b_norm = gko::initialize<real_vec<ValueType>>({0.0}, exec);
b->compute_norm2(b_norm);
return get_first_element(b_norm.get());
}
template <typename ValueType>
gko::remove_complex<ValueType> compute_residual_norm(
const gko::LinOp* system_matrix, const vec<ValueType>* b,
const vec<ValueType>* x)
{
auto exec = system_matrix->get_executor();
auto one = gko::initialize<vec<ValueType>>({1.0}, exec);
auto neg_one = gko::initialize<vec<ValueType>>({-1.0}, exec);
auto res = gko::clone(b);
system_matrix->apply(one, x, neg_one, res);
return compute_norm(res.get());
}
} // namespace utils
namespace loggers {
struct OperationLogger : gko::log::Logger {
void on_allocation_started(const gko::Executor* exec,
const gko::size_type&) const override
{
this->start_operation(exec, "allocate");
}
void on_allocation_completed(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->end_operation(exec, "allocate");
}
void on_free_started(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->start_operation(exec, "free");
}
void on_free_completed(const gko::Executor* exec,
const gko::uintptr&) const override
{
this->end_operation(exec, "free");
}
void on_copy_started(const gko::Executor* from, const gko::Executor* to,
const gko::uintptr&, const gko::uintptr&,
const gko::size_type&) const override
{
from->synchronize();
this->start_operation(to, "copy");
}
void on_copy_completed(const gko::Executor* from, const gko::Executor* to,
const gko::uintptr&, const gko::uintptr&,
const gko::size_type&) const override
{
from->synchronize();
this->end_operation(to, "copy");
}
void on_operation_launched(const gko::Executor* exec,
const gko::Operation* op) const override
{
this->start_operation(exec, op->get_name());
}
void on_operation_completed(const gko::Executor* exec,
const gko::Operation* op) const override
{
this->end_operation(exec, op->get_name());
}
void write_data(std::ostream& ostream)
{
for (const auto& entry : total) {
ostream << "\t" << entry.first.c_str() << ": "
<< std::chrono::duration_cast<std::chrono::nanoseconds>(
entry.second)
.count()
<< std::endl;
}
}
private:
void start_operation(const gko::Executor* exec,
const std::string& name) const
{
nested.emplace_back(0);
exec->synchronize();
start[name] = std::chrono::steady_clock::now();
}
void end_operation(const gko::Executor* exec, const std::string& name) const
{
exec->synchronize();
const auto end = std::chrono::steady_clock::now();
const auto diff = end - start[name];
total[name] += diff - nested.back();
nested.pop_back();
if (nested.size() > 0) {
nested.back() += diff;
}
}
mutable std::map<std::string, std::chrono::steady_clock::time_point> start;
mutable std::map<std::string, std::chrono::steady_clock::duration> total;
mutable std::vector<std::chrono::steady_clock::duration> nested;
};
struct StorageLogger : gko::log::Logger {
void on_allocation_completed(const gko::Executor*,
const gko::size_type& num_bytes,
const gko::uintptr& location) const override
{
storage[location] = num_bytes;
}
void on_free_completed(const gko::Executor*,
const gko::uintptr& location) const override
{
storage[location] = 0;
}
void write_data(std::ostream& ostream)
{
gko::size_type total{};
for (const auto& e : storage) {
total += e.second;
}
ostream << "Storage: " << total << std::endl;
}
private:
mutable std::unordered_map<gko::uintptr, gko::size_type> storage;
};
template <typename ValueType>
struct ResidualLogger : gko::log::Logger {
void on_iteration_complete(const gko::LinOp*, const gko::size_type&,
const gko::LinOp* residual,
const gko::LinOp* solution,
const gko::LinOp* residual_norm) const override
{
if (residual_norm) {
rec_res_norms.push_back(utils::get_first_element(
gko::as<real_vec<ValueType>>(residual_norm)));
} else {
rec_res_norms.push_back(
utils::compute_norm(gko::as<vec<ValueType>>(residual)));
}
if (solution) {
true_res_norms.push_back(utils::compute_residual_norm(
matrix, b, gko::as<vec<ValueType>>(solution)));
} else {
true_res_norms.push_back(-1.0);
}
}
ResidualLogger(const gko::LinOp* matrix, const vec<ValueType>* b)
: gko::log::Logger(gko::log::Logger::iteration_complete_mask),
matrix{matrix},
b{b}
{}
void write_data(std::ostream& ostream)
{
ostream << "Recurrent Residual Norms: " << std::endl;
ostream << "[" << std::endl;
for (const auto& entry : rec_res_norms) {
ostream << "\t" << entry << std::endl;
}
ostream << "];" << std::endl;
ostream << "True Residual Norms: " << std::endl;
ostream << "[" << std::endl;
for (const auto& entry : true_res_norms) {
ostream << "\t" << entry << std::endl;
}
ostream << "];" << std::endl;
}
private:
const gko::LinOp* matrix;
const vec<ValueType>* b;
mutable std::vector<gko::remove_complex<ValueType>> rec_res_norms;
mutable std::vector<gko::remove_complex<ValueType>> true_res_norms;
};
} // namespace loggers
namespace {
void print_usage(const char* filename)
{
std::cerr << "Usage: " << filename << " [executor] [matrix file]"
<< std::endl;
std::cerr << "matrix file should be a file in matrix market format. "
"The file data/A.mtx is provided as an example."
<< std::endl;
std::exit(-1);
}
template <typename ValueType>
void print_vector(const gko::matrix::Dense<ValueType>* vec)
{
auto elements_to_print = std::min(gko::size_type(10), vec->get_size()[0]);
std::cout << "[" << std::endl;
for (int i = 0; i < elements_to_print; ++i) {
std::cout << "\t" << vec->at(i) << std::endl;
}
std::cout << "];" << std::endl;
}
} // namespace
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
const gko::remove_complex<ValueType> reduction_factor = 1e-12;
const auto of_name = "log.txt";
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
std::string input_mtx = "data/A.mtx";
if (argc == 3) {
input_mtx = std::string(argv[2]);
}
auto storage_logger = std::make_shared<loggers::StorageLogger>();
exec->add_logger(storage_logger);
auto A = gko::share(gko::read<mtx>(std::ifstream(input_mtx), exec));
exec->remove_logger(storage_logger);
const auto max_iters = A->get_size()[0];
auto b = utils::create_vector<ValueType>(exec, A->get_size()[0], 1.0);
auto x = utils::create_vector<ValueType>(exec, A->get_size()[0], 0.0);
auto solver_factory =
solver::build()
.with_criteria(
.with_reduction_factor(reduction_factor),
gko::stop::Iteration::build().with_max_iters(max_iters))
.with_preconditioner(preconditioner::create(exec))
.on(exec);
std::ofstream output_file(of_name);
{
auto x_clone = gko::clone(x);
solver_factory->generate(A)->apply(b, x_clone);
exec->synchronize();
}
{
auto x_clone = gko::clone(x);
exec->synchronize();
auto g_tic = std::chrono::steady_clock::now();
auto generated_solver = solver_factory->generate(A);
exec->synchronize();
auto g_tac = std::chrono::steady_clock::now();
auto generate_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(g_tac - g_tic);
output_file << "Generate time (ns): " << generate_time.count()
<< std::endl;
exec->synchronize();
auto a_tic = std::chrono::steady_clock::now();
generated_solver->apply(b, x_clone);
exec->synchronize();
auto a_tac = std::chrono::steady_clock::now();
auto apply_time =
std::chrono::duration_cast<std::chrono::nanoseconds>(a_tac - a_tic);
output_file << "Apply time (ns): " << apply_time.count() << std::endl;
auto residual =
utils::compute_residual_norm(A.get(), b.get(), x_clone.get());
output_file << "Residual_norm: " << residual << std::endl;
}
{
auto gen_logger = std::make_shared<loggers::OperationLogger>();
exec->add_logger(gen_logger);
auto generated_solver = solver_factory->generate(A);
exec->remove_logger(gen_logger);
output_file << "Generate operations times (ns):" << std::endl;
gen_logger->write_data(output_file);
auto apply_logger = std::make_shared<loggers::OperationLogger>();
exec->add_logger(apply_logger);
auto res_logger = std::make_shared<loggers::ResidualLogger<ValueType>>(
A.get(), b.get());
generated_solver->add_logger(res_logger);
generated_solver->apply(b, x);
exec->remove_logger(apply_logger);
output_file << "Apply operations times (ns):" << std::endl;
apply_logger->write_data(output_file);
res_logger->write_data(output_file);
}
std::cout << "Solution, first ten entries: \n";
print_vector(x.get());
std::cout << "The performance and residual data can be found in " << of_name
<< std::endl;
}
gko::Executor::synchronize
virtual void synchronize() const =0
Synchronize the operations launched on the executor with its master.
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:54
gko::log::profile_event_category::solver
Solver events.
gko::LinOp
Definition: lin_op.hpp:146
gko::matrix::Dense
Dense is a matrix format which explicitly stores all values of the matrix.
Definition: dense_cache.hpp:48
gko::Executor::remove_logger
void remove_logger(const log::Logger *logger) override
Definition: executor.hpp:849
gko::uintptr
std::uintptr_t uintptr
Unsigned integer type capable of holding a pointer to void.
Definition: types.hpp:172
gko::matrix::IdentityFactory
This factory is a utility which can be used to generate Identity operators.
Definition: identity.hpp:131
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:120
gko::initialize
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition: dense.hpp:1573
gko::matrix::Dense::at
value_type & at(size_type row, size_type col) noexcept
Returns a single element of the matrix.
Definition: dense.hpp:897
gko::clone
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition: utils_helper.hpp:203
gko::HipExecutor::create
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
gko
The Ginkgo namespace.
Definition: abstract_factory.hpp:48
gko::version_info::get
static const version_info & get()
Returns an instance of version_info.
Definition: version.hpp:168
gko::Executor::add_logger
void add_logger(std::shared_ptr< const log::Logger > logger) override
Definition: executor.hpp:836
gko::LinOp::apply
LinOp * apply(ptr_param< const LinOp > b, ptr_param< LinOp > x)
Applies a linear operator to a vector (or a sequence of vectors).
Definition: lin_op.hpp:158
gko::stop::ResidualNorm
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition: residual_norm.hpp:138
gko::dim< 2 >
gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:155
gko::as
std::decay_t< T > * as(U *obj)
Performs polymorphic type conversion.
Definition: utils_helper.hpp:337
gko::solver::Cg
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition: cg.hpp:74
gko::Operation::get_name
virtual const char * get_name() const noexcept
Returns the operation's name.
gko::log::Logger
Definition: logger.hpp:104
gko::share
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition: utils_helper.hpp:254
gko::CudaExecutor::create
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition: executor.hpp:1373
gko::Executor
The first step in using the Ginkgo library consists of creating an executor.
Definition: executor.hpp:644
gko::PolymorphicObject::get_executor
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition: polymorphic_object.hpp:263
gko::LinOp::get_size
const dim< 2 > & get_size() const noexcept
Returns the size of the operator.
Definition: lin_op.hpp:239
gko::remove_complex
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition: math.hpp:354
gko::DpcppExecutor::create
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
gko::Operation
Operations can be used to define functionalities whose implementations differ among devices.
Definition: executor.hpp:287
gko::one
constexpr T one()
Returns the multiplicative identity for T.
Definition: math.hpp:803