Ginkgo  Generated from pipelines/1330831941 branch based on master. Ginkgo version 1.8.0
A numerical linear algebra library targeting many-core architectures
The par-ilu-convergence program

The ParILU convergence example..

This example depends on simple-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

About the example

This example can be used to inspect the convergence behavior of parallel incomplete factorizations. *

The commented program

template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;

print usage message

if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}

generate executor based on first argument

auto exec = try_generate([&] { return executors.at(argv[1])(); });

set matrix and preconditioner name with default values

std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);

load matrix file into Csr format

auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}

Results

This is the expected output:

Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<max-iterations>] [<num-repetitions>] [fill-in-limit]

When specifying an executor:

Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;

Comments about programming and debugging

The plain program

#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <ginkgo/ginkgo.hpp>
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
executors{
{"reference", [] { return gko::ReferenceExecutor::create(); }},
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp", [] {
}}};
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
decltype(fun()) result;
try {
result = fun();
} catch (const gko::Error& err) {
std::cerr << "Error: " << err.what() << '\n';
std::exit(-1);
}
return result;
}
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
residual->write(residual_data);
mtx->write(mtx_data);
residual_data.sort_row_major();
mtx_data.sort_row_major();
auto it = mtx_data.nonzeros.begin();
double residual_norm{};
for (auto entry : residual_data.nonzeros) {
auto ref_row = it->row;
auto ref_col = it->column;
if (entry.row == ref_row && entry.column == ref_col) {
residual_norm += gko::squared_norm(entry.value);
++it;
}
}
return std::sqrt(residual_norm);
}
int main(int argc, char* argv[])
{
using ValueType = double;
using IndexType = int;
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
std::cerr << "Usage: executable"
<< " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
"[<parilu|parilut|paric|parict] [<max-iterations>] "
"[<num-repetitions>] [<fill-in-limit>]\n";
return -1;
}
auto exec = try_generate([&] { return executors.at(argv[1])(); });
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
auto mtx = gko::share(try_generate([&] {
std::ifstream mtx_stream{matrix};
if (!mtx_stream) {
throw GKO_STREAM_ERROR("Unable to open matrix file");
}
std::cerr << "Reading " << matrix << std::endl;
return gko::read<gko::matrix::Csr<ValueType, IndexType>>(mtx_stream,
exec);
}));
auto factory_generator =
[&](gko::size_type iteration) -> std::shared_ptr<gko::LinOpFactory> {
if (precond == "parilu") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "paric") {
.with_iterations(iteration)
.on(exec);
} else if (precond == "parilut") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else if (precond == "parict") {
.with_fill_in_limit(limit)
.with_iterations(iteration)
.on(exec);
} else {
GKO_NOT_IMPLEMENTED;
}
};
auto one = gko::initialize<gko::matrix::Dense<ValueType>>({1.0}, exec);
auto minus_one =
gko::initialize<gko::matrix::Dense<ValueType>>({-1.0}, exec);
for (int it = 1; it <= max_iterations; ++it) {
auto factory = factory_generator(it);
std::cout << it << ';';
std::vector<long> times;
std::vector<double> residuals;
for (int rep = 0; rep < num_repetitions; ++rep) {
auto tic = std::chrono::high_resolution_clock::now();
auto result =
gko::as<gko::Composition<ValueType>>(factory->generate(mtx));
exec->synchronize();
auto toc = std::chrono::high_resolution_clock::now();
auto residual = gko::clone(exec, mtx);
result->get_operators()[0]->apply(one, result->get_operators()[1],
minus_one, residual);
times.push_back(
std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
.count());
residuals.push_back(
compute_ilu_residual_norm(residual.get(), mtx.get()));
}
for (auto el : times) {
std::cout << el << ';';
}
for (auto el : residuals) {
std::cout << el << ';';
}
std::cout << '\n';
}
}
gko::matrix_data::nonzeros
std::vector< nonzero_type > nonzeros
A vector of tuples storing the non-zeros of the matrix.
Definition: matrix_data.hpp:454
gko::Error::what
virtual const char * what() const noexcept override
Returns a human-readable string with a more detailed description of the error.
Definition: exception.hpp:75
gko::matrix::Csr
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition: matrix.hpp:27
gko::max
constexpr T max(const T &x, const T &y)
Returns the larger of the arguments.
Definition: math.hpp:845
gko::log::profile_event_category::factory
LinOpFactory events.
gko::factorization::ParIct
ParICT is an incomplete threshold-based Cholesky factorization which is computed in parallel.
Definition: par_ict.hpp:70
gko::size_type
std::size_t size_type
Integral type used for allocation quantities.
Definition: types.hpp:108
gko::squared_norm
constexpr auto squared_norm(const T &x) -> decltype(real(conj(x) *x))
Returns the squared norm of the object.
Definition: math.hpp:1057
gko::clone
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition: utils_helper.hpp:175
gko::HipExecutor::create
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
gko::factorization::ParIlut
ParILUT is an incomplete threshold-based LU factorization which is computed in parallel.
Definition: par_ilut.hpp:73
gko::matrix_data
This structure is used as an intermediate data type to store a sparse matrix.
Definition: matrix_data.hpp:127
gko::share
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition: utils_helper.hpp:226
gko::factorization::ParIlu
ParILU is an incomplete LU factorization which is computed in parallel.
Definition: par_ilu.hpp:71
gko::CudaExecutor::create
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
gko::OmpExecutor::create
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition: executor.hpp:1345
gko::factorization::ParIc
ParIC is an incomplete Cholesky factorization which is computed in parallel.
Definition: par_ic.hpp:70
gko::matrix_data::sort_row_major
void sort_row_major()
Sorts the nonzero vector so the values follow row-major order.
Definition: matrix_data.hpp:459
gko::matrix::Csr::write
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
gko::DpcppExecutor::create
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
gko::Error
The Error class is used to report exceptional behaviour in library functions.
Definition: exception.hpp:58
gko::one
constexpr T one()
Returns the multiplicative identity for T.
Definition: math.hpp:775