31#ifndef NDA_HAVE_DEVICE
59 template <
typename X,
typename Y>
60 requires((Scalar<X> or MemoryVector<X>) and (Scalar<Y> or MemoryVector<X>))
61 auto dot(X
const &x, Y
const &y) {
71 EXPECTS(x.shape() == y.shape());
74#if defined(NDA_HAVE_DEVICE)
75 return device::dot(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
81 return f77::dot(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
101 template <
typename X,
typename Y>
103 auto dotc(X
const &x, Y
const &y) {
113 EXPECTS(x.shape() == y.shape());
118#if defined(NDA_HAVE_DEVICE)
119 return device::dotc(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
125 return f77::dotc(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
133 template <
bool star,
typename X,
typename Y>
134 auto _dot_impl(X
const &x, Y
const &y) {
135 EXPECTS(x.shape() == y.shape());
136 long N = x.shape()[0];
138 auto _conj = [](
auto z) __attribute__((always_inline)) {
147 auto *__restrict px = x.data();
148 auto *__restrict py = y.data();
149 auto res = _conj(px[0]) * py[0];
150 for (
size_t i = 1; i < N; ++i) { res += _conj(px[i]) * py[i]; }
153 auto res = _conj(x(_linear_index_t{0})) * y(_linear_index_t{0});
154 for (
long i = 1; i < N; ++i) { res += _conj(x(_linear_index_t{i})) * y(_linear_index_t{i}); }
158 auto res = _conj(x(0)) * y(0);
159 for (
long i = 1; i < N; ++i) { res += _conj(x(i)) * y(i); }
175 template <
typename X,
typename Y>
180 return detail::_dot_impl<false>(x, y);
193 template <
typename X,
typename Y>
198 return detail::_dot_impl<true>(x, y);
Provides definitions and type traits involving the different memory address spaces supported by nda.
Provides a C++ interface for various BLAS routines.
Check if a given type is a memory vector, i.e. an nda::MemoryArrayOfRank<1>.
Check if a given type is either an arithmetic or complex type.
Provides concepts for the nda library.
Provides GPU and non-GPU specific functionality.
decltype(auto) conj(A &&a)
Function conj for nda::ArrayOrScalar types (lazy and coefficient-wise for nda::Array types with a com...
constexpr bool have_same_value_type_v
Constexpr variable that is true if all types in As have the same value type as A0.
std::decay_t< decltype(get_first_element(std::declval< A const >()))> get_value_t
Get the value type of an array/view or a scalar type.
constexpr bool is_regular_or_view_v
Constexpr variable that is true if type A is either a regular array or a view.
constexpr bool has_layout_smallest_stride_is_one
Constexpr variable that is true if type A has the smallest_stride_is_one nda::layout_prop_e guarantee...
auto dot_generic(X const &x, Y const &y)
Generic implementation of nda::blas::dot for types not supported by BLAS/LAPACK.
auto dotc(X const &x, Y const &y)
Interface to the BLAS dotc routine.
auto dotc_generic(X const &x, Y const &y)
Generic implementation of nda::blas::dotc for types not supported by BLAS/LAPACK.
auto dot(X const &x, Y const &y)
Interface to the BLAS dot routine.
static constexpr bool have_compatible_addr_space
Constexpr variable that is true if all given types have compatible address spaces.
static constexpr bool have_device_compatible_addr_space
Constexpr variable that is true if all given types have an address space compatible with Device.
void compile_error_no_gpu()
Trigger a compilation error in case GPU specific functionality is used without configuring the projec...
constexpr bool is_complex_v
Constexpr variable that is true if type T is a std::complex type.
constexpr bool is_blas_lapack_v
Alias for nda::is_double_or_complex_v.
Macros used in the nda library.
Provides some custom implementations of standard mathematical functions used for lazy,...
Provides type traits for the nda library.