Celeritas
0.5.0-86+4a8eea4
|
#include <cmath>
#include <type_traits>
#include "corecel/Config.hh"
#include "corecel/Assert.hh"
#include "corecel/Macros.hh"
#include "NumericLimits.hh"
#include "detail/AlgorithmsImpl.hh"
#include "detail/Sincospi.hh"
Classes | |
struct | celeritas::Less< T > |
Evaluator for the first argument being less than the second. More... | |
struct | celeritas::Less< void > |
Specialization of less with template deduction. More... | |
struct | celeritas::LocalWorkCalculator< T > |
Calculate local work for a given worker ID. More... | |
Functions | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T && | celeritas::forward (typename std::remove_reference< T >::type &v) noexcept |
Implement perfect forwarding with device-friendly functions. | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T && | celeritas::forward (typename std::remove_reference< T >::type &&v) noexcept |
template<class T > | |
CELER_CONSTEXPR_FUNCTION auto | celeritas::move (T &&v) noexcept -> typename std::remove_reference< T >::type && |
Cast a value as an rvalue reference to allow move construction. | |
template<class T > | |
CELER_FORCEINLINE_FUNCTION void | celeritas::trivial_swap (T &a, T &b) noexcept |
Support swapping of trivial types. | |
template<class T , class U = T> | |
CELER_FORCEINLINE_FUNCTION T | celeritas::exchange (T &dst, U &&src) |
Exchange values on host or device. | |
template<class InputIt , class Predicate > | |
CELER_FUNCTION bool | celeritas::all_of (InputIt iter, InputIt last, Predicate p) |
Whether the predicate is true for all items. | |
template<class InputIt , class Predicate > | |
CELER_FUNCTION bool | celeritas::any_of (InputIt iter, InputIt last, Predicate p) |
Whether the predicate is true for any item. | |
template<class InputIt , class Predicate > | |
CELER_FUNCTION bool | celeritas::all_adjacent (InputIt iter, InputIt last, Predicate p) |
Whether the predicate is true for pairs of consecutive items. | |
template<class T > | |
CELER_FUNCTION T const & | celeritas::clamp (T const &v, T const &lo, T const &hi) |
Clamp the value between lo and hi values. More... | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::clamp_to_nonneg (T v) noexcept |
Return the value or (if it's negative) then zero. More... | |
template<class ForwardIt , class T , class Compare > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp) |
Find the insertion point for a value in a sorted list using a binary search. | |
template<class ForwardIt , class T > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value) |
Find the insertion point for a value in a sorted list using a binary search. | |
template<class ForwardIt , class T , class Compare > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value, Compare comp) |
Find the insertion point for a value in a sorted list using a linear search. | |
template<class ForwardIt , class T > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value) |
Find the insertion point for a value in a sorted list using a linear search. | |
template<class ForwardIt , class T , class Compare > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp) |
Find the first element which is greater than | |
template<class ForwardIt , class T > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value) |
Find the first element which is greater than | |
template<class ForwardIt , class T , class Compare > | |
CELER_FUNCTION ForwardIt | celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value, Compare comp) |
Find the given element in a sorted range. | |
template<class ForwardIt , class T > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value) |
Find the given element in a sorted range. | |
template<class ForwardIt , class Predicate > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::partition (ForwardIt first, ForwardIt last, Predicate pred) |
Partition elements in the given range, "true" before "false". More... | |
template<class RandomAccessIt , class Compare > | |
CELER_FORCEINLINE_FUNCTION void | celeritas::sort (RandomAccessIt first, RandomAccessIt last, Compare comp) |
Sort an array on a single thread. More... | |
template<class RandomAccessIt > | |
CELER_FORCEINLINE_FUNCTION void | celeritas::sort (RandomAccessIt first, RandomAccessIt last) |
Sort an array on a single thread. | |
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T const & | celeritas::max (T const &a, T const &b) noexcept |
Return the higher of two values. More... | |
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T | celeritas::max (T a, T b) noexcept |
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T const & | celeritas::min (T const &a, T const &b) noexcept |
Return the lower of two values. More... | |
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T | celeritas::min (T a, T b) noexcept |
template<class ForwardIt , class Compare > | |
CELER_FUNCTION ForwardIt | celeritas::min_element (ForwardIt iter, ForwardIt last, Compare comp) |
Return an iterator to the lowest value in the range as defined by Compare. | |
template<class ForwardIt > | |
CELER_FORCEINLINE_FUNCTION ForwardIt | celeritas::min_element (ForwardIt first, ForwardIt last) |
Return an iterator to the lowest value in the range. | |
template<unsigned int N, class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::ipow (T v) noexcept |
Return an integer power of the input value. More... | |
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true> | |
CELER_FUNCTION T | celeritas::fastpow (T a, T b) |
Raise a number to a power with simplifying assumptions. More... | |
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true> | |
CELER_FORCEINLINE_FUNCTION T | celeritas::fma (T a, T b, T y) |
Use fused multiply-add for generic calculations. More... | |
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T | celeritas::fma (T a, T b, T y) |
Provide an FMA-like interface for integers. | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::hypot (T a, T b) |
Calculate a hypotenuse. More... | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::hypot (T a, T b, T c) |
Calculate a hypotenuse. | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::ceil_div (T top, T bottom) |
Integer division, rounding up, for positive numbers. | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::negate (T value) |
Negation that won't return signed zeros. | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION T | celeritas::diffsq (T a, T b) |
Calculate the difference of squares \( a^2 - b^2 \). More... | |
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true> | |
CELER_CONSTEXPR_FUNCTION T | celeritas::eumod (T numer, T denom) |
Calculate the Euclidian modulus of two numbers. More... | |
template<class T > | |
CELER_CONSTEXPR_FUNCTION int | celeritas::signum (T x) |
Calculate the sign of a number. More... | |
constexpr int | celeritas::popcount (unsigned int x) noexcept |
Count the number of set bits in an integer. | |
CELER_FORCEINLINE_FUNCTION void | celeritas::sincos (float a, float *s, float *c) |
CELER_FORCEINLINE_FUNCTION void | celeritas::sincos (double a, double *s, double *c) |
CELER_FORCEINLINE_FUNCTION void | celeritas::sincospi (float a, float *s, float *c) |
CELER_FORCEINLINE_FUNCTION void | celeritas::sincospi (double a, double *s, double *c) |
Variables | |
constexpr double | celeritas::m_pi {3.14159265358979323846} |
Double-precision math constant (POSIX derivative). More... | |
CUDA/HIP equivalent routines | |
#define | CELERITAS_SINCOSPI_PREFIX ::celeritas::detail:: |
Calculate an inverse square root. | |
#define | CELER_SINCOS_MANGLED(FUNC) ::celeritas::detail::FUNC |
Calculate an inverse square root. | |
CELER_FORCEINLINE_FUNCTION float | celeritas::rsqrt (float value) |
Calculate an inverse square root. | |
CELER_FORCEINLINE_FUNCTION double | celeritas::rsqrt (double value) |
Calculate an inverse square root. | |
CELER_FORCEINLINE_FUNCTION float | celeritas::sinpi (float a) |
CELER_FORCEINLINE_FUNCTION double | celeritas::sinpi (double a) |
Calculate an inverse square root. | |
CELER_FORCEINLINE_FUNCTION float | celeritas::cospi (float a) |
Calculate an inverse square root. | |
CELER_FORCEINLINE_FUNCTION double | celeritas::cospi (double a) |
Calculate an inverse square root. | |
|
inline |
Clamp the value between lo and hi values.
If the value is between lo and hi, return the value. Otherwise, return lo if it's below it, or hi above it.
This replaces:
or
assuming that the relationship between lo
and hi
holds.
|
noexcept |
Return the value or (if it's negative) then zero.
This is constructed to correctly propagate NaN
.
CELER_CONSTEXPR_FUNCTION T celeritas::diffsq | ( | T | a, |
T | b | ||
) |
Calculate the difference of squares \( a^2 - b^2 \).
This calculation exchanges one multiplication for one addition, but it does not increase the accuracy of the computed result. It is used occasionally in Geant4 but is likely a premature optimization... see https://github.com/celeritas-project/celeritas/pull/1082
CELER_CONSTEXPR_FUNCTION T celeritas::eumod | ( | T | numer, |
T | denom | ||
) |
Calculate the Euclidian modulus of two numbers.
If both numbers are positive, this should be the same as fmod. If the sign of the remainder and denominator don't match, the remainder will be remapped so that it is between zero and the denominator.
This function is useful for normalizing user-provided angles.
|
inline |
Raise a number to a power with simplifying assumptions.
This should be faster than std::pow
because we don't worry about exceptions for zeros, infinities, or negative values for a.
Example:
CELER_FORCEINLINE_FUNCTION T celeritas::fma | ( | T | a, |
T | b, | ||
T | y | ||
) |
Use fused multiply-add for generic calculations.
This provides a floating point specialization so that fma
can be used in code that is accelerated for floating point calculations but still works correctly with integer arithmetic.
Because of the single template parameter, it may be easier to use std::fma
directly in most cases.
CELER_CONSTEXPR_FUNCTION T celeritas::hypot | ( | T | a, |
T | b | ||
) |
Calculate a hypotenuse.
This does not conform to IEEE754 by returning infinity in edge cases (e.g., one argument is infinite and the other NaN). Similarly, it is not symmetric with respect to the function arguments.
To improve accuracy we could use [1].
[1] C.F. Borges, An Improved Algorithm for hypot(a,b), (2019). http://arxiv.org/abs/1904.09481 (accessed November 19, 2024).
|
noexcept |
Return an integer power of the input value.
Example:
|
noexcept |
Return the higher of two values.
This function is specialized when building CUDA device code, which has special intrinsics for max.
|
noexcept |
Return the lower of two values.
This function is specialized when building CUDA device code, which has special intrinsics for min.
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::partition | ( | ForwardIt | first, |
ForwardIt | last, | ||
Predicate | pred | ||
) |
Partition elements in the given range, "true" before "false".
This is done by swapping elements until the range is partitioned.
CELER_CONSTEXPR_FUNCTION int celeritas::signum | ( | T | x | ) |
Calculate the sign of a number.
CELER_FORCEINLINE_FUNCTION void celeritas::sincos | ( | double | a, |
double * | s, | ||
double * | c | ||
) |
Simultaneously evaluate the sine and cosine of a value
CELER_FORCEINLINE_FUNCTION void celeritas::sincos | ( | float | a, |
float * | s, | ||
float * | c | ||
) |
Simultaneously evaluate the sine and cosine of a value
CELER_FORCEINLINE_FUNCTION void celeritas::sincospi | ( | double | a, |
double * | s, | ||
double * | c | ||
) |
Simultaneously evaluate the sine and cosine of a value
CELER_FORCEINLINE_FUNCTION void celeritas::sincospi | ( | float | a, |
float * | s, | ||
float * | c | ||
) |
Simultaneously evaluate the sine and cosine of a value
CELER_FORCEINLINE_FUNCTION float celeritas::sinpi | ( | float | a | ) |
Get the sine or cosine of a value multiplied by pi for increased precision
CELER_FORCEINLINE_FUNCTION void celeritas::sort | ( | RandomAccessIt | first, |
RandomAccessIt | last, | ||
Compare | comp | ||
) |
Sort an array on a single thread.
This implementation is not thread-safe nor cooperative, but it can be called from CUDA code.
|
inlineconstexpr |
Double-precision math constant (POSIX derivative).
These should be used in host or type-dependent circumstances because, if using CELERITAS_REAL_TYPE=float
, this could have more accuracy than celeritas::constants::pi
.