DOLFINx
DOLFINx C++ interface
MatrixCSR.h
1// Copyright (C) 2021-2022 Garth N. Wells and Chris N. Richardson
2//
3// This file is part of DOLFINx (https://www.fenicsproject.org)
4//
5// SPDX-License-Identifier: LGPL-3.0-or-later
6
7#pragma once
8
9#include "SparsityPattern.h"
10#include <dolfinx/common/MPI.h>
11#include <dolfinx/graph/AdjacencyList.h>
12#include <mpi.h>
13#include <numeric>
14#include <vector>
15#include <xtensor/xtensor.hpp>
16#include <xtl/xspan.hpp>
17
18namespace dolfinx::la
19{
20
21namespace impl
22{
35template <typename U, typename V, typename W, typename X>
36void set_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
37 const X& xrows, const X& xcols,
38 [[maybe_unused]] typename X::value_type local_size)
39{
40 assert(x.size() == xrows.size() * xcols.size());
41 for (std::size_t r = 0; r < xrows.size(); ++r)
42 {
43 // Row index and current data row
44 auto row = xrows[r];
45 using T = typename W::value_type;
46 const T* xr = x.data() + r * xcols.size();
47
48#ifndef NDEBUG
49 if (row >= local_size)
50 throw std::runtime_error("Local row out of range");
51#endif
52
53 // Columns indices for row
54 auto cit0 = std::next(cols.begin(), row_ptr[row]);
55 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
56 for (std::size_t c = 0; c < xcols.size(); ++c)
57 {
58 // Find position of column index
59 auto it = std::lower_bound(cit0, cit1, xcols[c]);
60 assert(it != cit1);
61 std::size_t d = std::distance(cols.begin(), it);
62 assert(d < data.size());
63 data[d] = xr[c];
64 }
65 }
66}
67
77template <typename U, typename V, typename W, typename X>
78void add_csr(U&& data, const V& cols, const V& row_ptr, const W& x,
79 const X& xrows, const X& xcols)
80{
81 assert(x.size() == xrows.size() * xcols.size());
82 for (std::size_t r = 0; r < xrows.size(); ++r)
83 {
84 // Row index and current data row
85 auto row = xrows[r];
86 using T = typename W::value_type;
87 const T* xr = x.data() + r * xcols.size();
88
89#ifndef NDEBUG
90 if (row >= (int)row_ptr.size())
91 throw std::runtime_error("Local row out of range");
92#endif
93
94 // Columns indices for row
95 auto cit0 = std::next(cols.begin(), row_ptr[row]);
96 auto cit1 = std::next(cols.begin(), row_ptr[row + 1]);
97 for (std::size_t c = 0; c < xcols.size(); ++c)
98 {
99 // Find position of column index
100 auto it = std::lower_bound(cit0, cit1, xcols[c]);
101 assert(it != cit1);
102 std::size_t d = std::distance(cols.begin(), it);
103 assert(d < data.size());
104 data[d] += xr[c];
105 }
106 }
107}
108} // namespace impl
109
124template <typename T, class Allocator = std::allocator<T>>
126{
127public:
129 using value_type = T;
130
132 using allocator_type = Allocator;
133
140 {
141 return [&](const xtl::span<const std::int32_t>& rows,
142 const xtl::span<const std::int32_t>& cols,
143 const xtl::span<const T>& data) -> int
144 {
145 this->set(data, rows, cols);
146 return 0;
147 };
148 }
149
155 {
156 return [&](const xtl::span<const std::int32_t>& rows,
157 const xtl::span<const std::int32_t>& cols,
158 const xtl::span<const T>& data) -> int
159 {
160 this->add(data, rows, cols);
161 return 0;
162 };
163 }
164
169 MatrixCSR(const SparsityPattern& p, const Allocator& alloc = Allocator())
170 : _index_maps({p.index_map(0),
171 std::make_shared<common::IndexMap>(p.column_index_map())}),
172 _bs({p.block_size(0), p.block_size(1)}),
173 _data(p.num_nonzeros(), 0, alloc),
174 _cols(p.graph().array().begin(), p.graph().array().end()),
175 _row_ptr(p.graph().offsets().begin(), p.graph().offsets().end()),
176 _comm(p.index_map(0)->comm(common::IndexMap::Direction::reverse)),
177 _ghost_row_to_neighbor_rank(_index_maps[0]->ghost_owners())
178 {
179 // TODO: handle block sizes
180 if (_bs[0] > 1 or _bs[1] > 1)
181 throw std::runtime_error("Block size not yet supported");
182
183 // Compute off-diagonal offset for each row
184 xtl::span<const std::int32_t> num_diag_nnz = p.off_diagonal_offset();
185 _off_diagonal_offset.reserve(num_diag_nnz.size());
186 std::transform(num_diag_nnz.begin(), num_diag_nnz.end(), _row_ptr.begin(),
187 std::back_inserter(_off_diagonal_offset),
188 std::plus<std::int32_t>());
189
190 // Precompute some data for ghost updates via MPI
191 const std::array local_size
192 = {_index_maps[0]->size_local(), _index_maps[1]->size_local()};
193 const std::array local_range
194 = {_index_maps[0]->local_range(), _index_maps[1]->local_range()};
195
196 const std::vector<std::int64_t>& ghosts0 = _index_maps[0]->ghosts();
197 const std::vector<std::int64_t>& ghosts1 = _index_maps[1]->ghosts();
198
199 int outdegree(-1);
200 {
201 int indegree(-1), weighted(-1);
202 MPI_Dist_graph_neighbors_count(_comm.comm(), &indegree, &outdegree,
203 &weighted);
204 }
205
206 // Compute size of data to send to each neighbor
207 std::vector<std::int32_t> data_per_proc(outdegree, 0);
208 for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
209 {
210 assert(_ghost_row_to_neighbor_rank[i] < data_per_proc.size());
211 data_per_proc[_ghost_row_to_neighbor_rank[i]]
212 += _row_ptr[local_size[0] + i + 1] - _row_ptr[local_size[0] + i];
213 }
214
215 // Compute send displacements for values and indices (x2)
216 _val_send_disp.resize(outdegree + 1, 0);
217 std::partial_sum(data_per_proc.begin(), data_per_proc.end(),
218 std::next(_val_send_disp.begin()));
219
220 std::vector<int> index_send_disp(outdegree + 1);
221 std::transform(_val_send_disp.begin(), _val_send_disp.end(),
222 index_send_disp.begin(), [](int d) { return d * 2; });
223
224 // For each ghost row, pack and send indices to neighborhood
225 std::vector<int> insert_pos(index_send_disp);
226 std::vector<std::int64_t> ghost_index_data(index_send_disp.back());
227 for (std::size_t i = 0; i < _ghost_row_to_neighbor_rank.size(); ++i)
228 {
229 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
230 int row_id = local_size[0] + i;
231 for (int j = _row_ptr[row_id]; j < _row_ptr[row_id + 1]; ++j)
232 {
233 // Get position in send buffer to place data for this neighbour
234 const std::int32_t idx_pos = insert_pos[neighbor_rank];
235
236 // Pack send data (row, col) as global indices
237 ghost_index_data[idx_pos] = ghosts0[i];
238 if (const std::int32_t col_local = _cols[j]; col_local < local_size[1])
239 ghost_index_data[idx_pos + 1] = col_local + local_range[1][0];
240 else
241 ghost_index_data[idx_pos + 1] = ghosts1[col_local - local_size[1]];
242
243 insert_pos[neighbor_rank] += 2;
244 }
245 }
246
247 // Create and communicate adjacency list to neighborhood
248 const graph::AdjacencyList<std::int64_t> ghost_index_data_out(
249 std::move(ghost_index_data), std::move(index_send_disp));
250 const graph::AdjacencyList<std::int64_t> ghost_index_data_in
251 = dolfinx::MPI::neighbor_all_to_all(_comm.comm(), ghost_index_data_out);
252
253 // Store received offsets for future use, when transferring data values.
254 _val_recv_disp.resize(ghost_index_data_in.offsets().size());
255 std::transform(ghost_index_data_in.offsets().begin(),
256 ghost_index_data_in.offsets().end(), _val_recv_disp.begin(),
257 [](int d) { return d / 2; });
258
259 // Global to local map for ghost columns
260 std::map<std::int64_t, std::int32_t> global_to_local;
261 std::int32_t local_i = local_size[1];
262 for (std::int64_t global_i : ghosts1)
263 global_to_local.insert({global_i, local_i++});
264
265 // Compute location in which data for each index should be stored
266 // when received
267 const std::vector<std::int64_t>& ghost_index_array
268 = ghost_index_data_in.array();
269 for (std::size_t i = 0; i < ghost_index_array.size(); i += 2)
270 {
271 // Row must be on this process
272 const std::int32_t local_row = ghost_index_array[i] - local_range[0][0];
273 assert(local_row >= 0 and local_row < local_size[0]);
274
275 // Column may be owned or unowned
276 std::int32_t local_col = ghost_index_array[i + 1] - local_range[1][0];
277 if (local_col < 0 or local_col >= local_size[1])
278 {
279 const auto it = global_to_local.find(ghost_index_array[i + 1]);
280 assert(it != global_to_local.end());
281 local_col = it->second;
282 }
283 auto cit0 = std::next(_cols.begin(), _row_ptr[local_row]);
284 auto cit1 = std::next(_cols.begin(), _row_ptr[local_row + 1]);
285
286 // Find position of column index and insert data
287 auto cit = std::lower_bound(cit0, cit1, local_col);
288 assert(cit != cit1);
289 assert(*cit == local_col);
290 std::size_t d = std::distance(_cols.begin(), cit);
291 _unpack_pos.push_back(d);
292 }
293 }
294
297 MatrixCSR(MatrixCSR&& A) = default;
298
302 void set(T x) { std::fill(_data.begin(), _data.end(), x); }
303
316 void set(const xtl::span<const T>& x,
317 const xtl::span<const std::int32_t>& rows,
318 const xtl::span<const std::int32_t>& cols)
319 {
320 impl::set_csr(_data, _cols, _row_ptr, x, rows, cols,
321 _index_maps[0]->size_local());
322 }
323
336 void add(const xtl::span<const T>& x,
337 const xtl::span<const std::int32_t>& rows,
338 const xtl::span<const std::int32_t>& cols)
339 {
340 impl::add_csr(_data, _cols, _row_ptr, x, rows, cols);
341 }
342
344 std::int32_t num_owned_rows() const { return _index_maps[0]->size_local(); }
345
347 std::int32_t num_all_rows() const { return _row_ptr.size() - 1; }
348
355 xt::xtensor<T, 2> to_dense() const
356 {
357 const std::int32_t nrows = num_all_rows();
358 const std::int32_t ncols
359 = _index_maps[1]->size_local() + _index_maps[1]->num_ghosts();
360 xt::xtensor<T, 2> A = xt::zeros<T>({nrows, ncols});
361 for (std::size_t r = 0; r < nrows; ++r)
362 for (int j = _row_ptr[r]; j < _row_ptr[r + 1]; ++j)
363 A(r, _cols[j]) = _data[j];
364 return A;
365 }
366
370 void finalize()
371 {
373 finalize_end();
374 }
375
384 {
385 const std::int32_t local_size0 = _index_maps[0]->size_local();
386 const std::int32_t num_ghosts0 = _index_maps[0]->num_ghosts();
387
388 // For each ghost row, pack and send values to send to neighborhood
389 std::vector<int> insert_pos(_val_send_disp);
390 std::vector<T> ghost_value_data(_val_send_disp.back());
391 for (int i = 0; i < num_ghosts0; ++i)
392 {
393 const int neighbor_rank = _ghost_row_to_neighbor_rank[i];
394
395 // Get position in send buffer to place data to send to this neighbour
396 const std::int32_t val_pos = insert_pos[neighbor_rank];
397 std::copy(std::next(_data.data(), _row_ptr[local_size0 + i]),
398 std::next(_data.data(), _row_ptr[local_size0 + i + 1]),
399 std::next(ghost_value_data.begin(), val_pos));
400 insert_pos[neighbor_rank]
401 += _row_ptr[local_size0 + i + 1] - _row_ptr[local_size0 + i];
402 }
403
404 _ghost_value_data_in.resize(_val_recv_disp.back());
405
406 // Compute data sizes for send and receive from displacements
407 std::vector<int> val_send_count(_val_send_disp.size() - 1);
408 std::adjacent_difference(std::next(_val_send_disp.begin()),
409 _val_send_disp.end(), val_send_count.begin());
410
411 std::vector<int> val_recv_count(_val_recv_disp.size() - 1);
412 std::adjacent_difference(std::next(_val_recv_disp.begin()),
413 _val_recv_disp.end(), val_recv_count.begin());
414
415 int status = MPI_Ineighbor_alltoallv(
416 ghost_value_data.data(), val_send_count.data(), _val_send_disp.data(),
417 dolfinx::MPI::mpi_type<T>(), _ghost_value_data_in.data(),
418 val_recv_count.data(), _val_recv_disp.data(),
419 dolfinx::MPI::mpi_type<T>(), _comm.comm(), &_request);
420 assert(status == MPI_SUCCESS);
421 }
422
429 {
430 int status = MPI_Wait(&_request, MPI_STATUS_IGNORE);
431 assert(status == MPI_SUCCESS);
432
433 // Add to local rows
434 assert(_ghost_value_data_in.size() == _unpack_pos.size());
435 for (std::size_t i = 0; i < _ghost_value_data_in.size(); ++i)
436 _data[_unpack_pos[i]] += _ghost_value_data_in[i];
437
438 // Set ghost row data to zero
439 const std::int32_t local_size0 = _index_maps[0]->size_local();
440 std::fill(std::next(_data.begin(), _row_ptr[local_size0]), _data.end(), 0);
441 }
442
444 double norm_squared() const
445 {
446 const std::size_t num_owned_rows = _index_maps[0]->size_local();
447 assert(num_owned_rows < _row_ptr.size());
448
449 const double norm_sq_local = std::accumulate(
450 _data.cbegin(), std::next(_data.cbegin(), _row_ptr[num_owned_rows]),
451 double(0), [](double norm, T y) { return norm + std::norm(y); });
452 double norm_sq;
453 MPI_Allreduce(&norm_sq_local, &norm_sq, 1, MPI_DOUBLE, MPI_SUM,
454 _comm.comm());
455 return norm_sq;
456 }
457
464 const std::array<std::shared_ptr<const common::IndexMap>, 2>&
466 {
467 return _index_maps;
468 }
469
472 std::vector<T>& values() { return _data; }
473
476 const std::vector<T>& values() const { return _data; }
477
480 const std::vector<std::int32_t>& row_ptr() const { return _row_ptr; }
481
484 const std::vector<std::int32_t>& cols() const { return _cols; }
485
493 const std::vector<std::int32_t>& off_diag_offset() const
494 {
495 return _off_diagonal_offset;
496 }
497
498private:
499 // Maps describing the data layout for rows and columns
500 std::array<std::shared_ptr<const common::IndexMap>, 2> _index_maps;
501
502 // Block sizes
503 std::array<int, 2> _bs;
504
505 // Matrix data
506 std::vector<T, Allocator> _data;
507 std::vector<std::int32_t> _cols, _row_ptr;
508
509 // Start of off-diagonal (unowned columns) on each row
510 std::vector<std::int32_t> _off_diagonal_offset;
511
512 // Neighborhood communicator (ghost->owner communicator for rows)
513 dolfinx::MPI::Comm _comm;
514
515 // -- Precomputed data for finalize/update
516
517 // Request in non-blocking communication
518 MPI_Request _request;
519
520 // Position in _data to add received data
521 std::vector<int> _unpack_pos;
522
523 // Displacements for alltoall for each neighbor when sending and
524 // receiving
525 std::vector<int> _val_send_disp, _val_recv_disp;
526
527 // Ownership of each row, by neighbor
528 std::vector<int> _ghost_row_to_neighbor_rank;
529
530 // Temporary store for finalize data during non-blocking communication
531 std::vector<T> _ghost_value_data_in;
532};
533
534} // namespace dolfinx::la
A duplicate MPI communicator and manage lifetime of the communicator.
Definition: MPI.h:40
Distributed sparse matrix.
Definition: MatrixCSR.h:126
auto mat_set_values()
Insertion functor for setting values in matrix. It is typically used in finite element assembly funct...
Definition: MatrixCSR.h:139
Allocator allocator_type
The allocator type.
Definition: MatrixCSR.h:132
xt::xtensor< T, 2 > to_dense() const
Copy to a dense matrix.
Definition: MatrixCSR.h:355
void add(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Accumulate values in the matrix.
Definition: MatrixCSR.h:336
void finalize()
Transfer ghost row data to the owning ranks accumulating received values on the owned rows,...
Definition: MatrixCSR.h:370
const std::vector< std::int32_t > & cols() const
Get local column indices.
Definition: MatrixCSR.h:484
std::int32_t num_owned_rows() const
Number of local rows excluding ghost rows.
Definition: MatrixCSR.h:344
MatrixCSR(const SparsityPattern &p, const Allocator &alloc=Allocator())
Create a distributed matrix.
Definition: MatrixCSR.h:169
const std::vector< T > & values() const
Get local values (const version)
Definition: MatrixCSR.h:476
auto mat_add_values()
Insertion functor for accumulating values in matrix. It is typically used in finite element assembly ...
Definition: MatrixCSR.h:154
void set(T x)
Set all non-zero local entries to a value including entries in ghost rows.
Definition: MatrixCSR.h:302
MatrixCSR(MatrixCSR &&A)=default
Move constructor.
void finalize_end()
End transfer of ghost row data to owning ranks.
Definition: MatrixCSR.h:428
void set(const xtl::span< const T > &x, const xtl::span< const std::int32_t > &rows, const xtl::span< const std::int32_t > &cols)
Set values in the matrix.
Definition: MatrixCSR.h:316
const std::vector< std::int32_t > & off_diag_offset() const
Get the start of off-diagonal (unowned columns) on each row, allowing the matrix to be split (virtual...
Definition: MatrixCSR.h:493
std::int32_t num_all_rows() const
Number of local rows including ghost rows.
Definition: MatrixCSR.h:347
const std::array< std::shared_ptr< const common::IndexMap >, 2 > & index_maps() const
Index maps for the row and column space. The row IndexMap contains ghost entries for rows which may b...
Definition: MatrixCSR.h:465
double norm_squared() const
Compute the Frobenius norm squared.
Definition: MatrixCSR.h:444
const std::vector< std::int32_t > & row_ptr() const
Get local row pointers.
Definition: MatrixCSR.h:480
std::vector< T > & values()
Get local data values.
Definition: MatrixCSR.h:472
T value_type
The value type.
Definition: MatrixCSR.h:129
void finalize_begin()
Begin transfer of ghost row data to owning ranks, where it will be accumulated into existing owned ro...
Definition: MatrixCSR.h:383
This class provides a sparsity pattern data structure that can be used to initialize sparse matrices....
Definition: SparsityPattern.h:34
std::shared_ptr< const common::IndexMap > index_map(int dim) const
Index map for given dimension dimension. Returns the index map for rows and columns that will be set ...
Definition: SparsityPattern.cpp:208
int block_size(int dim) const
Return index map block size for dimension dim.
Definition: SparsityPattern.cpp:245
const graph::AdjacencyList< std::int32_t > & graph() const
Sparsity pattern graph after assembly. Uses local indices for the columns.
Definition: SparsityPattern.cpp:424
std::int64_t num_nonzeros() const
Number of nonzeros on this rank after assembly, including ghost rows.
Definition: SparsityPattern.cpp:403
common::IndexMap column_index_map() const
Builds the index map for columns after assembly of the sparsity pattern.
Definition: SparsityPattern.cpp:228
xtl::span< const int > off_diagonal_offset() const
Row-wise start of off-diagonal (unowned columns) on each row.
Definition: SparsityPattern.cpp:431
constexpr std::array< std::int64_t, 2 > local_range(int rank, std::int64_t N, int size)
Return local range for the calling process, partitioning the global [0, N - 1] range across all ranks...
Definition: MPI.h:82
graph::AdjacencyList< T > neighbor_all_to_all(MPI_Comm comm, const graph::AdjacencyList< T > &send_data)
Send in_values[n0] to neighbor process n0 and receive values from neighbor process n1 in out_values[n...
Definition: MPI.h:682
Linear algebra interface.
Definition: sparsitybuild.h:13