20#ifndef NDA_HAVE_DEVICE
48 template <
typename X,
typename Y>
49 requires((Scalar<X> or MemoryVector<X>) and (Scalar<Y> or MemoryVector<X>))
50 auto dot(X
const &x, Y
const &y) {
60 EXPECTS(x.shape() == y.shape());
63#if defined(NDA_HAVE_DEVICE)
64 return device::dot(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
70 return f77::dot(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
90 template <
typename X,
typename Y>
92 auto dotc(X
const &x, Y
const &y) {
102 EXPECTS(x.shape() == y.shape());
107#if defined(NDA_HAVE_DEVICE)
108 return device::dotc(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
114 return f77::dotc(x.size(), x.data(), x.indexmap().strides()[0], y.data(), y.indexmap().strides()[0]);
122 template <
bool star,
typename X,
typename Y>
123 auto _dot_impl(X
const &x, Y
const &y) {
124 EXPECTS(x.shape() == y.shape());
125 long N = x.shape()[0];
127 auto _conj = [](
auto z) __attribute__((always_inline)) {
136 auto *__restrict px = x.data();
137 auto *__restrict py = y.data();
138 auto res = _conj(px[0]) * py[0];
139 for (
size_t i = 1; i < N; ++i) { res += _conj(px[i]) * py[i]; }
142 auto res = _conj(x(_linear_index_t{0})) * y(_linear_index_t{0});
143 for (
long i = 1; i < N; ++i) { res += _conj(x(_linear_index_t{i})) * y(_linear_index_t{i}); }
147 auto res = _conj(x(0)) * y(0);
148 for (
long i = 1; i < N; ++i) { res += _conj(x(i)) * y(i); }
164 template <
typename X,
typename Y>
169 return detail::_dot_impl<false>(x, y);
182 template <
typename X,
typename Y>
187 return detail::_dot_impl<true>(x, y);
Provides definitions and type traits involving the different memory address spaces supported by nda.
Provides a C++ interface for various BLAS routines.
Check if a given type is a memory vector, i.e. an nda::MemoryArrayOfRank<1>.
Check if a given type is either an arithmetic or complex type.
Provides concepts for the nda library.
Provides GPU and non-GPU specific functionality.
decltype(auto) conj(A &&a)
Function conj for nda::ArrayOrScalar types (lazy and coefficient-wise for nda::Array types with a com...
constexpr bool have_same_value_type_v
Constexpr variable that is true if all types in As have the same value type as A0.
std::decay_t< decltype(get_first_element(std::declval< A const >()))> get_value_t
Get the value type of an array/view or a scalar type.
constexpr bool is_regular_or_view_v
Constexpr variable that is true if type A is either a regular array or a view.
constexpr bool has_layout_smallest_stride_is_one
Constexpr variable that is true if type A has the smallest_stride_is_one nda::layout_prop_e guarantee...
auto dot_generic(X const &x, Y const &y)
Generic implementation of nda::blas::dot for types not supported by BLAS/LAPACK.
auto dotc(X const &x, Y const &y)
Interface to the BLAS dotc routine.
auto dotc_generic(X const &x, Y const &y)
Generic implementation of nda::blas::dotc for types not supported by BLAS/LAPACK.
auto dot(X const &x, Y const &y)
Interface to the BLAS dot routine.
static constexpr bool have_compatible_addr_space
Constexpr variable that is true if all given types have compatible address spaces.
static constexpr bool have_device_compatible_addr_space
Constexpr variable that is true if all given types have an address space compatible with Device.
void compile_error_no_gpu()
Trigger a compilation error in case GPU specific functionality is used without configuring the projec...
constexpr bool is_complex_v
Constexpr variable that is true if type T is a std::complex type.
constexpr bool is_blas_lapack_v
Alias for nda::is_double_or_complex_v.
Macros used in the nda library.
Provides some custom implementations of standard mathematical functions used for lazy,...
Provides type traits for the nda library.