#include <cmath>
#include <type_traits>
#include "corecel/Config.hh"
#include "corecel/Assert.hh"
#include "corecel/Macros.hh"
#include "NumericLimits.hh"
#include "detail/AlgorithmsImpl.hh"
#include "detail/Sincospi.hh"

Classes
struct	celeritas::Less< T >
	Evaluator for the first argument being less than the second. More...

struct	celeritas::Less< void >
	Specialization of less with template deduction. More...

struct	celeritas::Identity
	A function object type whose operator() returns its argument unchanged. More...

struct	celeritas::LogicalNot< T >
	A Function object for performing logical NOT (logical negation). More...

struct	celeritas::LogicalNot< void >
	Specialization with template deduction. More...

struct	celeritas::LocalWorkCalculator< T >
	Calculate local work for a given worker ID. More...

Functions
template<class T >
CELER_CONSTEXPR_FUNCTION T &&	celeritas::forward (typename std::remove_reference< T >::type &v) noexcept
	Implement perfect forwarding with device-friendly functions.

template<class T >
CELER_CONSTEXPR_FUNCTION T &&	celeritas::forward (typename std::remove_reference< T >::type &&v) noexcept

template<class T >
CELER_CONSTEXPR_FUNCTION auto	celeritas::move (T &&v) noexcept -> typename std::remove_reference< T >::type &&
	Cast a value as an rvalue reference to allow move construction.

template<class T >
CELER_FORCEINLINE_FUNCTION void	celeritas::trivial_swap (T &a, T &b) noexcept
	Support swapping of trivial types.

template<class T , class U = T>
CELER_FORCEINLINE_FUNCTION T	celeritas::exchange (T &dst, U &&src)
	Replace a value and return the original.

template<class InputIt , class Predicate >
CELER_FUNCTION bool	celeritas::all_of (InputIt iter, InputIt last, Predicate p)
	Whether the predicate is true for all items.

template<class InputIt , class Predicate >
CELER_FUNCTION bool	celeritas::any_of (InputIt iter, InputIt last, Predicate p)
	Whether the predicate is true for any item.

template<class InputIt , class Predicate >
CELER_FUNCTION bool	celeritas::all_adjacent (InputIt iter, InputIt last, Predicate p)
	Whether the predicate is true for pairs of consecutive items.

template<class T >
CELER_FUNCTION T const &	celeritas::clamp (T const &v, T const &lo, T const &hi)
	Clamp the value between lo and hi values.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::clamp_to_nonneg (T v) noexcept
	Return the value or (if it's negative) then zero.

template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp)
	Find the insertion point for a value in a sorted list using a binary search.

template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::lower_bound (ForwardIt first, ForwardIt last, T const &value)
	Find the insertion point for a value in a sorted list using a binary search.

template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value, Compare comp)
	Find the insertion point for a value in a sorted list using a linear search.

template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::lower_bound_linear (ForwardIt first, ForwardIt last, T const &value)
	Find the insertion point for a value in a sorted list using a linear search.

template<class ForwardIt , class T , class Compare >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value, Compare comp)
	Find the first element which is greater than .

template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::upper_bound (ForwardIt first, ForwardIt last, T const &value)
	Find the first element which is greater than .

template<class ForwardIt , class T , class Compare >
CELER_FUNCTION ForwardIt	celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value, Compare comp)
	Find the given element in a sorted range.

template<class ForwardIt , class T >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::find_sorted (ForwardIt first, ForwardIt last, T const &value)
	Find the given element in a sorted range.

template<class ForwardIt , class Predicate >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::partition (ForwardIt first, ForwardIt last, Predicate pred)
	Partition elements in the given range, "true" before "false".

template<class RandomAccessIt , class Compare >
CELER_FORCEINLINE_FUNCTION void	celeritas::sort (RandomAccessIt first, RandomAccessIt last, Compare comp)
	Sort an array on a single thread.

template<class RandomAccessIt >
CELER_FORCEINLINE_FUNCTION void	celeritas::sort (RandomAccessIt first, RandomAccessIt last)
	Sort an array on a single thread.

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const &	celeritas::max (T const &a, T const &b) noexcept
	Return the higher of two values.

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T	celeritas::max (T a, T b) noexcept

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T const &	celeritas::min (T const &a, T const &b) noexcept
	Return the lower of two values.

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T	celeritas::min (T a, T b) noexcept

template<class ForwardIt , class Compare >
CELER_FUNCTION ForwardIt	celeritas::min_element (ForwardIt iter, ForwardIt last, Compare comp)
	Return an iterator to the lowest value in the range as defined by Compare.

template<class ForwardIt >
CELER_FORCEINLINE_FUNCTION ForwardIt	celeritas::min_element (ForwardIt first, ForwardIt last)
	Return an iterator to the lowest value in the range.

template<unsigned int N, class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::ipow (T v) noexcept
	Return a nonnegative integer power of the input value.

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FUNCTION T	celeritas::fastpow (T a, T b)
	Raise a number to a power with simplifying assumptions.

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_FORCEINLINE_FUNCTION T	celeritas::fma (T a, T b, T y)
	Use fused multiply-add for generic calculations.

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T	celeritas::fma (T a, T b, T y)
	Provide an FMA-like interface for integers.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::hypot (T a, T b)
	Calculate a hypotenuse.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::hypot (T a, T b, T c)
	Calculate a hypotenuse.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::ceil_div (T top, T bottom)
	Integer division, rounding up, for positive numbers.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::negate (T value)
	Negation that won't return signed zeros.

template<class T >
CELER_CONSTEXPR_FUNCTION T	celeritas::diffsq (T a, T b)
	Calculate the difference of squares \( a^2 - b^2 \).

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>
CELER_CONSTEXPR_FUNCTION T	celeritas::eumod (T num, T denom)
	Calculate the Euclidean modulus of two numbers.

template<class T >
CELER_CONSTEXPR_FUNCTION int	celeritas::signum (T x)
	Calculate the sign of a number.

template<class T >
CELER_CONSTEXPR_FUNCTION int	celeritas::popcount (T x) noexcept
	Count the number of set bits in an integer.


CELER_FORCEINLINE_FUNCTION void	celeritas::sincos (float a, float s, float c)

CELER_FORCEINLINE_FUNCTION void	celeritas::sincos (double a, double s, double c)


CELER_FORCEINLINE_FUNCTION void	celeritas::sincospi (float a, float s, float c)

CELER_FORCEINLINE_FUNCTION void	celeritas::sincospi (double a, double s, double c)

CUDA/HIP equivalent routines
#define	CELERITAS_SINCOSPI_PREFIX ::celeritas::detail::
	Calculate an inverse square root.

#define	CELER_SINCOS_MANGLED(FUNC) ::celeritas::detail::FUNC
	Calculate an inverse square root.

CELER_FORCEINLINE_FUNCTION float	celeritas::rsqrt (float value)
	Calculate an inverse square root.

CELER_FORCEINLINE_FUNCTION double	celeritas::rsqrt (double value)
	Calculate an inverse square root.

CELER_FORCEINLINE_FUNCTION float	celeritas::sinpi (float a)

CELER_FORCEINLINE_FUNCTION double	celeritas::sinpi (double a)
	Calculate an inverse square root.

CELER_FORCEINLINE_FUNCTION float	celeritas::cospi (float a)
	Calculate an inverse square root.

CELER_FORCEINLINE_FUNCTION double	celeritas::cospi (double a)
	Calculate an inverse square root.

Function Documentation

◆ clamp()

template<class T >

CELER_FUNCTION T const & celeritas::clamp	(	T const &	v,
		T const &	lo,
		T const &	hi
	)

inline

Clamp the value between lo and hi values.

If the value is between lo and hi, return the value. Otherwise, return lo if it's below it, or hi above it.

This replaces:

min(hi, max(lo, v))

or

max(v, min(v, lo))

assuming that the relationship between lo and hi holds.

This is constructed to propagate NaN.

◆ clamp_to_nonneg()

template<class T >

CELER_CONSTEXPR_FUNCTION T celeritas::clamp_to_nonneg ( T v )

noexcept

Return the value or (if it's negative) then zero.

This is constructed to propagate NaN.

◆ diffsq()

template<class T >

CELER_CONSTEXPR_FUNCTION T celeritas::diffsq	(	T	a,
		T	b
	)

Calculate the difference of squares \( a^2 - b^2 \).

This calculation exchanges one multiplication for one addition, but it does not increase the accuracy of the computed result. It is used occasionally in Geant4 but is likely a premature optimization... see https://github.com/celeritas-project/celeritas/pull/1082

◆ eumod()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>

CELER_CONSTEXPR_FUNCTION T celeritas::eumod	(	T	num,
		T	denom
	)

Calculate the Euclidean modulus of two numbers.

num numerator
denom denominator

If both numbers are positive, this should be the same as fmod. If the sign of the remainder and denominator don't match, the remainder will be remapped so that it is between zero and the denominator.

This function is useful for normalizing user-provided angles. Examples:

eumod(3, 2) == 1
eumod(-0.5, 2) == 1.5
eumod(-2, 2) == 0

◆ exchange()

template<class T , class U = T>

CELER_FORCEINLINE_FUNCTION T celeritas::exchange	(	T &	dst,
		U &&	src
	)

Replace a value and return the original.

This has a similar signature to atomic updates.

◆ fastpow()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>

CELER_FUNCTION T celeritas::fastpow	(	T	a,
		T	b
	)

inline

Raise a number to a power with simplifying assumptions.

This should be faster than std::pow because we don't worry about special cases for zeros, infinities, or negative values for a.

Example:

assert(9.0 == fastpow(3.0, 2.0));

◆ fma()

template<class T , std::enable_if_t< std::is_floating_point< T >::value, bool > = true>

CELER_FORCEINLINE_FUNCTION T celeritas::fma	(	T	a,
		T	b,
		T	y
	)

Use fused multiply-add for generic calculations.

This provides a floating point specialization so that fma can be used in code that is accelerated for floating point calculations but still works correctly with integer arithmetic.

Because of the single template parameter, it may be easier to use std::fma directly in most cases.

◆ hypot()

template<class T >

CELER_CONSTEXPR_FUNCTION T celeritas::hypot	(	T	a,
		T	b
	)

Calculate a hypotenuse.

This does not conform to IEEE754 by returning infinity in edge cases (e.g., one argument is infinite and the other NaN). Similarly, it is not symmetric with respect to the function arguments.

To improve accuracy we could use [1].

[1] C.F. Borges, An Improved Algorithm for hypot(a,b), (2019). http://arxiv.org/abs/1904.09481 (accessed November 19, 2024).

◆ ipow()

template<unsigned int N, class T >

CELER_CONSTEXPR_FUNCTION T celeritas::ipow ( T v )

noexcept

Return a nonnegative integer power of the input value.

Example:

assert(9.0 == ipow<2>(3.0));
assert(256 == ipow<8>(2));
static_assert(256 == ipow<8>(2));

◆ max()

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>

CELER_CONSTEXPR_FUNCTION T const & celeritas::max	(	T const &	a,
		T const &	b
	)

noexcept

Return the higher of two values.

This function is specialized so that floating point types use std::fmax for better performance on GPU and ARM.

◆ min()

template<class T , std::enable_if_t<!std::is_floating_point< T >::value, bool > = true>

CELER_CONSTEXPR_FUNCTION T const & celeritas::min	(	T const &	a,
		T const &	b
	)

noexcept

Return the lower of two values.

This function is specialized so that floating point types use std::fmin for better performance on GPU and ARM.

◆ partition()

template<class ForwardIt , class Predicate >

CELER_FORCEINLINE_FUNCTION ForwardIt celeritas::partition	(	ForwardIt	first,
		ForwardIt	last,
		Predicate	pred
	)

Partition elements in the given range, "true" before "false".

This is done by swapping elements until the range is partitioned.

◆ signum()

template<class T >

CELER_CONSTEXPR_FUNCTION int celeritas::signum ( T x )

Calculate the sign of a number.

Returns: -1 if negative, 0 if exactly zero (or NaN), 1 if positive

◆ sincos() [1/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincos	(	double	a,
		double *	s,
		double *	c
	)

Simultaneously evaluate the sine and cosine of a value.

◆ sincos() [2/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincos	(	float	a,
		float *	s,
		float *	c
	)

Simultaneously evaluate the sine and cosine of a value.

◆ sincospi() [1/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincospi	(	double	a,
		double *	s,
		double *	c
	)

Simultaneously evaluate the sine and cosine of a value factored by pi.

◆ sincospi() [2/2]

CELER_FORCEINLINE_FUNCTION void celeritas::sincospi	(	float	a,
		float *	s,
		float *	c
	)

Simultaneously evaluate the sine and cosine of a value factored by pi.

◆ sinpi()

CELER_FORCEINLINE_FUNCTION float celeritas::sinpi ( float a )

Get the sine or cosine of a value multiplied by pi for increased precision

◆ sort()

template<class RandomAccessIt , class Compare >

CELER_FORCEINLINE_FUNCTION void celeritas::sort	(	RandomAccessIt	first,
		RandomAccessIt	last,
		Compare	comp
	)

Sort an array on a single thread.

This implementation is not thread-safe nor cooperative, but it can be called from CUDA code.

Classes

Functions

CUDA/HIP equivalent routines

Function Documentation

◆ clamp()

◆ clamp_to_nonneg()

◆ diffsq()

◆ eumod()

◆ exchange()

◆ fastpow()

◆ fma()

◆ hypot()

◆ ipow()

◆ max()

◆ min()

◆ partition()

◆ signum()

◆ sincos() [1/2]

◆ sincos() [2/2]

◆ sincospi() [1/2]

◆ sincospi() [2/2]

◆ sinpi()

◆ sort()