Celeritas  0.5.0-86+4a8eea4
Classes | Functions | Variables
Algorithms.hh File Reference
#include <cmath>
#include <type_traits>
#include "corecel/Config.hh"
#include "corecel/Assert.hh"
#include "corecel/Macros.hh"
#include "NumericLimits.hh"
#include "detail/AlgorithmsImpl.hh"
#include "detail/Sincospi.hh"

Classes

struct  celeritas::Less< T >
 Evaluator for the first argument being less than the second. More...
 
struct  celeritas::Less< void >
 Specialization of less with template deduction. More...
 
struct  celeritas::LocalWorkCalculator< T >
 Calculate local work for a given worker ID. More...
 

Functions

template<class T >
CELER_CONSTEXPR_FUNCTION T && celeritas::forward (typename std::remove_reference< T >::type &v) noexcept
 Implement perfect forwarding with device-friendly functions.
 
template<class T >
CELER_CONSTEXPR_FUNCTION T && celeritas::forward (typename std::remove_reference< T >::type &&v) noexcept
 
template<class T >
CELER_CONSTEXPR_FUNCTION auto celeritas::move (T &&v) noexcept -> typename std::remove_reference< T >::type &&
 Cast a value as an rvalue reference to allow move construction.
 
template<class T >
CELER_FORCEINLINE_FUNCTION void celeritas::trivial_swap (T &a, T &b) noexcept
 Support swapping of trivial types.
 
template<class T , class U = T>
CELER_FORCEINLINE_FUNCTIONceleritas::exchange (T &dst, U &&src)
 Exchange values on host or device.
 
template<class InputIt , class Predicate >
CELER_FUNCTION bool celeritas::all_of (InputIt iter, InputIt last, Predicate p)
 Whether the predicate is true for all items.
 
template<class InputIt , class Predicate >
CELER_FUNCTION bool celeritas::any_of (InputIt iter, InputIt last, Predicate p)
 Whether the predicate is true for any item.
 
template<class InputIt , class Predicate >
CELER_FUNCTION bool celeritas::all_adjacent (InputIt iter, InputIt last, Predicate p)
 Whether the predicate is true for pairs of consecutive items.
 
template<class T >
CELER_FUNCTION T const & celeritas::clamp (T const &v, T const &lo, T const &hi)
 Clamp the value between lo and hi values. More...
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::clamp_to_nonneg (T v) noexcept
 Return the value or (if it's negative) then zero. More...
 
template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp)
 Find the insertion point for a value in a sorted list using a binary search.
 
template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value)
 Find the insertion point for a value in a sorted list using a binary search.
 
template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value, Compare comp)
 Find the insertion point for a value in a sorted list using a linear search.
 
template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value)
 Find the insertion point for a value in a sorted list using a linear search.
 
template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp)
 Find the first element which is greater than
 
template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value)
 Find the first element which is greater than
 
template<class ForwardIt , class T , class Compare >
CELER_FUNCTION ForwardIt celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value, Compare comp)
 Find the given element in a sorted range.
 
template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value)
 Find the given element in a sorted range.
 
template<class ForwardIt , class Predicate >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::partition (ForwardIt first, ForwardIt last, Predicate pred)
 Partition elements in the given range, "true" before "false". More...
 
template<class RandomAccessIt , class Compare >
CELER_FORCEINLINE_FUNCTION void celeritas::sort (RandomAccessIt first, RandomAccessIt last, Compare comp)
 Sort an array on a single thread. More...
 
template<class RandomAccessIt >
CELER_FORCEINLINE_FUNCTION void celeritas::sort (RandomAccessIt first, RandomAccessIt last)
 Sort an array on a single thread.
 
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const & celeritas::max (T const &a, T const &b) noexcept
 Return the higher of two values. More...
 
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTIONceleritas::max (T a, T b) noexcept
 
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const & celeritas::min (T const &a, T const &b) noexcept
 Return the lower of two values. More...
 
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTIONceleritas::min (T a, T b) noexcept
 
template<class ForwardIt , class Compare >
CELER_FUNCTION ForwardIt celeritas::min_element (ForwardIt iter, ForwardIt last, Compare comp)
 Return an iterator to the lowest value in the range as defined by Compare.
 
template<class ForwardIt >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::min_element (ForwardIt first, ForwardIt last)
 Return an iterator to the lowest value in the range.
 
template<unsigned int N, class T >
CELER_CONSTEXPR_FUNCTIONceleritas::ipow (T v) noexcept
 Return an integer power of the input value. More...
 
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FUNCTIONceleritas::fastpow (T a, T b)
 Raise a number to a power with simplifying assumptions. More...
 
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FORCEINLINE_FUNCTIONceleritas::fma (T a, T b, T y)
 Use fused multiply-add for generic calculations. More...
 
template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTIONceleritas::fma (T a, T b, T y)
 Provide an FMA-like interface for integers.
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::hypot (T a, T b)
 Calculate a hypotenuse. More...
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::hypot (T a, T b, T c)
 Calculate a hypotenuse.
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::ceil_div (T top, T bottom)
 Integer division, rounding up, for positive numbers.
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::negate (T value)
 Negation that won't return signed zeros.
 
template<class T >
CELER_CONSTEXPR_FUNCTIONceleritas::diffsq (T a, T b)
 Calculate the difference of squares \( a^2 - b^2 \). More...
 
template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTIONceleritas::eumod (T numer, T denom)
 Calculate the Euclidian modulus of two numbers. More...
 
template<class T >
CELER_CONSTEXPR_FUNCTION int celeritas::signum (T x)
 Calculate the sign of a number. More...
 
constexpr int celeritas::popcount (unsigned int x) noexcept
 Count the number of set bits in an integer.
 
CELER_FORCEINLINE_FUNCTION void celeritas::sincos (float a, float *s, float *c)
 
CELER_FORCEINLINE_FUNCTION void celeritas::sincos (double a, double *s, double *c)
 
CELER_FORCEINLINE_FUNCTION void celeritas::sincospi (float a, float *s, float *c)
 
CELER_FORCEINLINE_FUNCTION void celeritas::sincospi (double a, double *s, double *c)
 

Variables

constexpr double celeritas::m_pi {3.14159265358979323846}
 Double-precision math constant (POSIX derivative). More...
 

CUDA/HIP equivalent routines

#define CELERITAS_SINCOSPI_PREFIX   ::celeritas::detail::
 Calculate an inverse square root.
 
#define CELER_SINCOS_MANGLED(FUNC)   ::celeritas::detail::FUNC
 Calculate an inverse square root.
 
CELER_FORCEINLINE_FUNCTION float celeritas::rsqrt (float value)
 Calculate an inverse square root.
 
CELER_FORCEINLINE_FUNCTION double celeritas::rsqrt (double value)
 Calculate an inverse square root.
 
CELER_FORCEINLINE_FUNCTION float celeritas::sinpi (float a)
 
CELER_FORCEINLINE_FUNCTION double celeritas::sinpi (double a)
 Calculate an inverse square root.
 
CELER_FORCEINLINE_FUNCTION float celeritas::cospi (float a)
 Calculate an inverse square root.
 
CELER_FORCEINLINE_FUNCTION double celeritas::cospi (double a)
 Calculate an inverse square root.
 

Function Documentation

◆ clamp()

template<class T >
CELER_FUNCTION T const& celeritas::clamp ( T const &  v,
T const &  lo,
T const &  hi 
)
inline

Clamp the value between lo and hi values.

If the value is between lo and hi, return the value. Otherwise, return lo if it's below it, or hi above it.

This replaces:

min(hi, max(lo, v))
CELER_CONSTEXPR_FUNCTION T const & max(T const &a, T const &b) noexcept
Return the higher of two values.
Definition: Algorithms.hh:368
CELER_CONSTEXPR_FUNCTION T const & min(T const &a, T const &b) noexcept
Return the lower of two values.
Definition: Algorithms.hh:389

or

max(v, min(v, lo))

assuming that the relationship between lo and hi holds.

◆ clamp_to_nonneg()

template<class T >
CELER_CONSTEXPR_FUNCTION T celeritas::clamp_to_nonneg ( v)
noexcept

Return the value or (if it's negative) then zero.

This is constructed to correctly propagate NaN.

◆ diffsq()

template<class T >
CELER_CONSTEXPR_FUNCTION T celeritas::diffsq ( a,
b 
)

Calculate the difference of squares \( a^2 - b^2 \).

This calculation exchanges one multiplication for one addition, but it does not increase the accuracy of the computed result. It is used occasionally in Geant4 but is likely a premature optimization... see https://github.com/celeritas-project/celeritas/pull/1082

◆ eumod()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T celeritas::eumod ( numer,
denom 
)

Calculate the Euclidian modulus of two numbers.

If both numbers are positive, this should be the same as fmod. If the sign of the remainder and denominator don't match, the remainder will be remapped so that it is between zero and the denominator.

This function is useful for normalizing user-provided angles.

◆ fastpow()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FUNCTION T celeritas::fastpow ( a,
b 
)
inline

Raise a number to a power with simplifying assumptions.

This should be faster than std::pow because we don't worry about exceptions for zeros, infinities, or negative values for a.

Example:

assert(9.0 == fastpow(3.0, 2.0));
CELER_FUNCTION T fastpow(T a, T b)
Raise a number to a power with simplifying assumptions.
Definition: Algorithms.hh:484

◆ fma()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FORCEINLINE_FUNCTION T celeritas::fma ( a,
b,
y 
)

Use fused multiply-add for generic calculations.

This provides a floating point specialization so that fma can be used in code that is accelerated for floating point calculations but still works correctly with integer arithmetic.

Because of the single template parameter, it may be easier to use std::fma directly in most cases.

◆ hypot()

template<class T >
CELER_CONSTEXPR_FUNCTION T celeritas::hypot ( a,
b 
)

Calculate a hypotenuse.

This does not conform to IEEE754 by returning infinity in edge cases (e.g., one argument is infinite and the other NaN). Similarly, it is not symmetric with respect to the function arguments.

To improve accuracy we could use [1].

[1] C.F. Borges, An Improved Algorithm for hypot(a,b), (2019). http://arxiv.org/abs/1904.09481 (accessed November 19, 2024).

◆ ipow()

template<unsigned int N, class T >
CELER_CONSTEXPR_FUNCTION T celeritas::ipow ( v)
noexcept

Return an integer power of the input value.

Example:

assert(9.0 == ipow<2>(3.0));
assert(256 == ipow<8>(2));
static_assert(256 == ipow<8>(2));

◆ max()

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const& celeritas::max ( T const &  a,
T const &  b 
)
noexcept

Return the higher of two values.

This function is specialized when building CUDA device code, which has special intrinsics for max.

◆ min()

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const& celeritas::min ( T const &  a,
T const &  b 
)
noexcept

Return the lower of two values.

This function is specialized when building CUDA device code, which has special intrinsics for min.

◆ partition()

template<class ForwardIt , class Predicate >
CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::partition ( ForwardIt  first,
ForwardIt  last,
Predicate  pred 
)

Partition elements in the given range, "true" before "false".

This is done by swapping elements until the range is partitioned.

◆ signum()

template<class T >
CELER_CONSTEXPR_FUNCTION int celeritas::signum ( x)

Calculate the sign of a number.

Returns
-1 if negative, 0 if exactly zero (or NaN), 1 if positive

◆ sincos() [1/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincos ( double  a,
double *  s,
double *  c 
)

Simultaneously evaluate the sine and cosine of a value

◆ sincos() [2/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincos ( float  a,
float *  s,
float *  c 
)

Simultaneously evaluate the sine and cosine of a value

◆ sincospi() [1/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincospi ( double  a,
double *  s,
double *  c 
)

Simultaneously evaluate the sine and cosine of a value

◆ sincospi() [2/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincospi ( float  a,
float *  s,
float *  c 
)

Simultaneously evaluate the sine and cosine of a value

◆ sinpi()

CELER_FORCEINLINE_FUNCTION float celeritas::sinpi ( float  a)

Get the sine or cosine of a value multiplied by pi for increased precision

◆ sort()

template<class RandomAccessIt , class Compare >
CELER_FORCEINLINE_FUNCTION void celeritas::sort ( RandomAccessIt  first,
RandomAccessIt  last,
Compare  comp 
)

Sort an array on a single thread.

This implementation is not thread-safe nor cooperative, but it can be called from CUDA code.

Variable Documentation

◆ m_pi

constexpr double celeritas::m_pi {3.14159265358979323846}
inlineconstexpr

Double-precision math constant (POSIX derivative).

These should be used in host or type-dependent circumstances because, if using CELERITAS_REAL_TYPE=float, this could have more accuracy than celeritas::constants::pi .