24#ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
25#define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
28#pragma clang diagnostic push
29#pragma clang diagnostic ignored "-Wreserved-identifier"
30#pragma clang diagnostic ignored "-Wreserved-macro-identifier"
31#pragma clang diagnostic ignored "-Wc++98-compat"
32#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
33#pragma clang diagnostic ignored "-Wsign-conversion"
34#pragma clang diagnostic ignored "-Wfloat-conversion"
35#pragma clang diagnostic ignored "-Wdouble-promotion"
36#pragma clang diagnostic ignored "-Wnested-anon-types"
37#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
38#pragma clang diagnostic ignored "-Wfloat-equal"
41#if defined(__HIPCC_RTC__)
42 #define __HOST_DEVICE__ __device__
44 #define __HOST_DEVICE__ __host__ __device__
45 #include <hip/amd_detail/amd_hip_common.h>
48 #if defined(__cplusplus)
50 #include <type_traits>
55#if defined(__clang__) && defined(__HIP__)
56 typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
60 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
69 static_assert(
sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
79 #if defined(__cplusplus)
80 #if !defined(__HIPCC_RTC__)
81 #include "hip_fp16_math_fwd.h"
82 #include "amd_hip_vector_types.h"
84 #include "amd_device_functions.h"
85 #include "amd_warp_functions.h"
89 template<>
struct is_floating_point<_Float16> : std::true_type {};
92 template<
bool cond,
typename T =
void>
93 using Enable_if_t =
typename std::enable_if<cond, T>::type;
99 static_assert(
sizeof(_Float16) ==
sizeof(
unsigned short),
"");
110 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
112 __half(
decltype(data) x) : data{x} {}
115 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
117 __half(T x) : data{static_cast<_Float16>(x)} {}
120 __half(
const __half&) =
default;
122 __half(__half&&) =
default;
127 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
129 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
131 __half(T x) : data{static_cast<_Float16>(x)} {}
136 __half& operator=(
const __half&) =
default;
138 __half& operator=(__half&&) =
default;
146 volatile __half& operator=(
const __half_raw& x)
volatile
151 volatile __half& operator=(
const volatile __half_raw& x)
volatile
161 volatile __half& operator=(
__half_raw&& x)
volatile
166 volatile __half& operator=(
volatile __half_raw&& x)
volatile
171 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
174 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
176 __half& operator=(T x)
178 data =
static_cast<_Float16
>(x);
184 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
186 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
188 __half& operator=(T x)
190 data =
static_cast<_Float16
>(x);
195 #if !defined(__HIP_NO_HALF_OPERATORS__)
197 __half& operator+=(
const __half& x)
203 __half& operator-=(
const __half& x)
209 __half& operator*=(
const __half& x)
215 __half& operator/=(
const __half& x)
221 __half& operator++() { ++data;
return *
this; }
223 __half operator++(
int)
230 __half& operator--() { --data;
return *
this; }
232 __half operator--(
int)
241 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
244 Enable_if_t<std::is_floating_point<T>{}>* =
nullptr>
246 operator T()
const {
return data; }
256 #if !defined(__HIP_NO_HALF_CONVERSIONS__)
258 typename T, Enable_if_t<std::is_integral<T>{}>* =
nullptr>
260 operator T()
const {
return data; }
263 #if !defined(__HIP_NO_HALF_OPERATORS__)
265 __half operator+()
const {
return *
this; }
267 __half operator-()
const
270 tmp.data = -tmp.data;
276 #if !defined(__HIP_NO_HALF_OPERATORS__)
280 __half operator+(
const __half& x,
const __half& y)
282 return __half{x} += y;
287 __half operator-(
const __half& x,
const __half& y)
289 return __half{x} -= y;
294 __half operator*(
const __half& x,
const __half& y)
296 return __half{x} *= y;
301 __half operator/(
const __half& x,
const __half& y)
303 return __half{x} /= y;
308 bool operator==(
const __half& x,
const __half& y)
310 return x.data == y.data;
315 bool operator!=(
const __half& x,
const __half& y)
322 bool operator<(
const __half& x,
const __half& y)
324 return x.data < y.data;
329 bool operator>(
const __half& x,
const __half& y)
331 return y.data < x.data;
336 bool operator<=(
const __half& x,
const __half& y)
343 bool operator>=(
const __half& x,
const __half& y)
356 sizeof(_Float16_2) ==
sizeof(
unsigned short[2]),
"");
371 __half2(
decltype(data) x) : data{x} {}
373 __half2(
const __half& x,
const __half& y)
380 __half2(
const __half2&) =
default;
382 __half2(__half2&&) =
default;
384 ~__half2() =
default;
388 __half2& operator=(
const __half2&) =
default;
390 __half2& operator=(__half2&&) =
default;
399 #if !defined(__HIP_NO_HALF_OPERATORS__)
401 __half2& operator+=(
const __half2& x)
407 __half2& operator-=(
const __half2& x)
413 __half2& operator*=(
const __half2& x)
419 __half2& operator/=(
const __half2& x)
425 __half2& operator++() {
return *
this += _Float16_2{1, 1}; }
427 __half2 operator++(
int)
434 __half2& operator--() {
return *
this -= _Float16_2{1, 1}; }
436 __half2 operator--(
int)
446 operator decltype(data)()
const {
return data; }
455 #if !defined(__HIP_NO_HALF_OPERATORS__)
457 __half2 operator+()
const {
return *
this; }
459 __half2 operator-()
const
462 tmp.data = -tmp.data;
468 #if !defined(__HIP_NO_HALF_OPERATORS__)
472 __half2 operator+(
const __half2& x,
const __half2& y)
474 return __half2{x} += y;
479 __half2 operator-(
const __half2& x,
const __half2& y)
481 return __half2{x} -= y;
486 __half2 operator*(
const __half2& x,
const __half2& y)
488 return __half2{x} *= y;
493 __half2 operator/(
const __half2& x,
const __half2& y)
495 return __half2{x} /= y;
500 bool operator==(
const __half2& x,
const __half2& y)
502 auto r = x.data == y.data;
503 return r.x != 0 && r.y != 0;
508 bool operator!=(
const __half2& x,
const __half2& y)
515 bool operator<(
const __half2& x,
const __half2& y)
517 auto r = x.data < y.data;
518 return r.x != 0 && r.y != 0;
523 bool operator>(
const __half2& x,
const __half2& y)
530 bool operator<=(
const __half2& x,
const __half2& y)
537 bool operator>=(
const __half2& x,
const __half2& y)
549 __half2 make_half2(__half x, __half y)
551 return __half2{x, y};
556 __half __low2half(__half2 x)
563 __half __high2half(__half2 x)
570 __half2 __half2half2(__half x)
572 return __half2{x, x};
577 __half2 __halves2half2(__half x, __half y)
579 return __half2{x, y};
584 __half2 __low2half2(__half2 x)
594 __half2 __high2half2(__half2 x)
604 __half2 __lows2half2(__half2 x, __half2 y)
614 __half2 __highs2half2(__half2 x, __half2 y)
635 short __half_as_short(__half x)
642 unsigned short __half_as_ushort(__half x)
649 __half __short_as_half(
short x)
657 __half __ushort_as_half(
unsigned short x)
666 __half __float2half(
float x)
672 __half __float2half_rn(
float x)
676 #if !defined(__HIPCC_RTC__)
680 __half __float2half_rz(
float x)
686 __half __float2half_rd(
float x)
692 __half __float2half_ru(
float x)
699 __half __float2half_rz(
float x)
705 __half __float2half_rd(
float x)
711 __half __float2half_ru(
float x)
717 __half2 __float2half2_rn(
float x)
721 static_cast<_Float16
>(x),
static_cast<_Float16
>(x)}};
725 __half2 __floats2half2_rn(
float x,
float y)
727 return __half2{_Float16_2{
728 static_cast<_Float16
>(x),
static_cast<_Float16
>(y)}};
732 __half2 __float22half2_rn(
float2 x)
734 return __floats2half2_rn(x.x, x.y);
740 float __half2float(__half x)
758 float2 __half22float2(__half2 x)
768 int __half2int_rn(__half x)
774 int __half2int_rz(__half x)
780 int __half2int_rd(__half x)
786 int __half2int_ru(__half x)
794 __half __int2half_rn(
int x)
800 __half __int2half_rz(
int x)
806 __half __int2half_rd(
int x)
812 __half __int2half_ru(
int x)
820 short __half2short_rn(__half x)
826 short __half2short_rz(__half x)
832 short __half2short_rd(__half x)
838 short __half2short_ru(__half x)
846 __half __short2half_rn(
short x)
852 __half __short2half_rz(
short x)
858 __half __short2half_rd(
short x)
864 __half __short2half_ru(
short x)
872 long long __half2ll_rn(__half x)
878 long long __half2ll_rz(__half x)
884 long long __half2ll_rd(__half x)
890 long long __half2ll_ru(__half x)
898 __half __ll2half_rn(
long long x)
904 __half __ll2half_rz(
long long x)
910 __half __ll2half_rd(
long long x)
916 __half __ll2half_ru(
long long x)
924 unsigned int __half2uint_rn(__half x)
930 unsigned int __half2uint_rz(__half x)
936 unsigned int __half2uint_rd(__half x)
942 unsigned int __half2uint_ru(__half x)
950 __half __uint2half_rn(
unsigned int x)
956 __half __uint2half_rz(
unsigned int x)
962 __half __uint2half_rd(
unsigned int x)
968 __half __uint2half_ru(
unsigned int x)
976 unsigned short __half2ushort_rn(__half x)
982 unsigned short __half2ushort_rz(__half x)
988 unsigned short __half2ushort_rd(__half x)
994 unsigned short __half2ushort_ru(__half x)
1002 __half __ushort2half_rn(
unsigned short x)
1008 __half __ushort2half_rz(
unsigned short x)
1014 __half __ushort2half_rd(
unsigned short x)
1020 __half __ushort2half_ru(
unsigned short x)
1028 unsigned long long __half2ull_rn(__half x)
1034 unsigned long long __half2ull_rz(__half x)
1040 unsigned long long __half2ull_rd(__half x)
1046 unsigned long long __half2ull_ru(__half x)
1054 __half __ull2half_rn(
unsigned long long x)
1060 __half __ull2half_rz(
unsigned long long x)
1066 __half __ull2half_rd(
unsigned long long x)
1072 __half __ull2half_ru(
unsigned long long x)
1080 __half __ldg(
const __half* ptr) {
return *ptr; }
1083 __half __ldcg(
const __half* ptr) {
return *ptr; }
1086 __half __ldca(
const __half* ptr) {
return *ptr; }
1089 __half __ldcs(
const __half* ptr) {
return *ptr; }
1093 __half2 __ldg(
const __half2* ptr) {
return *ptr; }
1096 __half2 __ldcg(
const __half2* ptr) {
return *ptr; }
1099 __half2 __ldca(
const __half2* ptr) {
return *ptr; }
1102 __half2 __ldcs(
const __half2* ptr) {
return *ptr; }
1107 bool __heq(__half x, __half y)
1114 bool __hne(__half x, __half y)
1121 bool __hle(__half x, __half y)
1128 bool __hge(__half x, __half y)
1135 bool __hlt(__half x, __half y)
1142 bool __hgt(__half x, __half y)
1148 bool __hequ(__half x, __half y) {
1153 bool __hneu(__half x, __half y) {
1157 bool __hleu(__half x, __half y) {
1162 bool __hgeu(__half x, __half y) {
1167 bool __hltu(__half x, __half y) {
1172 bool __hgtu(__half x, __half y) {
1178 __half2
__heq2(__half2 x, __half2 y)
1182 return __builtin_convertvector(-r, _Float16_2);
1186 __half2
__hne2(__half2 x, __half2 y)
1190 return __builtin_convertvector(-r, _Float16_2);
1194 __half2
__hle2(__half2 x, __half2 y)
1198 return __builtin_convertvector(-r, _Float16_2);
1202 __half2
__hge2(__half2 x, __half2 y)
1206 return __builtin_convertvector(-r, _Float16_2);
1210 __half2
__hlt2(__half2 x, __half2 y)
1214 return __builtin_convertvector(-r, _Float16_2);
1218 __half2
__hgt2(__half2 x, __half2 y)
1222 return __builtin_convertvector(-r, _Float16_2);
1224 inline __HOST_DEVICE__
1225 __half2 __hequ2(__half2 x, __half2 y) {
1228 return __builtin_convertvector(-r, _Float16_2);
1232 __half2 __hneu2(__half2 x, __half2 y) {
1234 return __builtin_convertvector(-r, _Float16_2);
1238 __half2 __hleu2(__half2 x, __half2 y) {
1240 return __builtin_convertvector(-r, _Float16_2);
1244 __half2 __hgeu2(__half2 x, __half2 y) {
1246 return __builtin_convertvector(-r, _Float16_2);
1250 __half2 __hltu2(__half2 x, __half2 y) {
1252 return __builtin_convertvector(-r, _Float16_2);
1256 __half2 __hgtu2(__half2 x, __half2 y) {
1258 return __builtin_convertvector(-r, _Float16_2);
1263 bool __hbeq2(__half2 x, __half2 y)
1266 return r.data.x != 0 && r.data.y != 0;
1270 bool __hbne2(__half2 x, __half2 y)
1273 return r.data.x != 0 && r.data.y != 0;
1277 bool __hble2(__half2 x, __half2 y)
1280 return r.data.x != 0 && r.data.y != 0;
1284 bool __hbge2(__half2 x, __half2 y)
1287 return r.data.x != 0 && r.data.y != 0;
1291 bool __hblt2(__half2 x, __half2 y)
1294 return r.data.x != 0 && r.data.y != 0;
1298 bool __hbgt2(__half2 x, __half2 y)
1301 return r.data.x != 0 && r.data.y != 0;
1323 __half
__hmax(
const __half x,
const __half y) {
1329 __half __hmax_nan(
const __half x,
const __half y) {
1330 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1332 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1339 __half
__hmin(
const __half x,
const __half y) {
1345 __half __hmin_nan(
const __half x,
const __half y) {
1346 if(__ocml_isnan_f16(
static_cast<__half_raw>(x).data)) {
1348 }
else if (__ocml_isnan_f16(
static_cast<__half_raw>(y).data)) {
1357 __half __clamp_01(__half x)
1368 __half
__hadd(__half x, __half y)
1379 __ocml_fabs_f16(
static_cast<__half_raw>(x).data)};
1383 __half
__hsub(__half x, __half y)
1391 __half
__hmul(__half x, __half y)
1399 __half __hadd_sat(__half x, __half y)
1401 return __clamp_01(
__hadd(x, y));
1405 __half __hsub_sat(__half x, __half y)
1407 return __clamp_01(
__hsub(x, y));
1411 __half __hmul_sat(__half x, __half y)
1413 return __clamp_01(
__hmul(x, y));
1417 __half
__hfma(__half x, __half y, __half z)
1426 __half __hfma_sat(__half x, __half y, __half z)
1428 return __clamp_01(
__hfma(x, y, z));
1432 __half
__hdiv(__half x, __half y)
1441 __half2
__hadd2(__half2 x, __half2 y)
1452 __ocml_fabs_2f16(
static_cast<__half2_raw>(x).data)};
1456 __half2
__hsub2(__half2 x, __half2 y)
1464 __half2
__hmul2(__half2 x, __half2 y)
1472 __half2 __hadd2_sat(__half2 x, __half2 y)
1481 __half2 __hsub2_sat(__half2 x, __half2 y)
1490 __half2 __hmul2_sat(__half2 x, __half2 y)
1499 __half2
__hfma2(__half2 x, __half2 y, __half2 z)
1501 return __half2{__ocml_fma_2f16(x, y, z)};
1505 __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
1514 __half2
__h2div(__half2 x, __half2 y)
1522 #if defined(__clang__) && defined(__HIP__)
1525 float amd_mixed_dot(__half2 a, __half2 b,
float c,
bool saturate) {
1526 return __ockl_fdot2(
static_cast<__half2_raw>(a).data,
1536 __ocml_trunc_f16(
static_cast<__half_raw>(x).data)};
1540 __half
hceil(__half x)
1543 __ocml_ceil_f16(
static_cast<__half_raw>(x).data)};
1550 __ocml_floor_f16(
static_cast<__half_raw>(x).data)};
1554 __half
hrint(__half x)
1557 __ocml_rint_f16(
static_cast<__half_raw>(x).data)};
1561 __half
hsin(__half x)
1564 __ocml_sin_f16(
static_cast<__half_raw>(x).data)};
1568 __half
hcos(__half x)
1571 __ocml_cos_f16(
static_cast<__half_raw>(x).data)};
1575 __half
hexp(__half x)
1578 __ocml_exp_f16(
static_cast<__half_raw>(x).data)};
1582 __half
hexp2(__half x)
1585 __ocml_exp2_f16(
static_cast<__half_raw>(x).data)};
1592 __ocml_exp10_f16(
static_cast<__half_raw>(x).data)};
1596 __half
hlog2(__half x)
1599 __ocml_log2_f16(
static_cast<__half_raw>(x).data)};
1603 __half
hlog(__half x)
1606 __ocml_log_f16(
static_cast<__half_raw>(x).data)};
1613 __ocml_log10_f16(
static_cast<__half_raw>(x).data)};
1617 __half
hrcp(__half x)
1620 static_cast<_Float16
>(__builtin_amdgcn_rcph(
static_cast<__half_raw>(x).data))};
1627 __ocml_rsqrt_f16(
static_cast<__half_raw>(x).data)};
1631 __half
hsqrt(__half x)
1634 __ocml_sqrt_f16(
static_cast<__half_raw>(x).data)};
1640 return __ocml_isinf_f16(
static_cast<__half_raw>(x).data);
1646 return __ocml_isnan_f16(
static_cast<__half_raw>(x).data);
1659 return __half2{__ocml_trunc_2f16(x)};
1663 __half2
h2ceil(__half2 x)
1665 return __half2{__ocml_ceil_2f16(x)};
1671 return __half2{__ocml_floor_2f16(x)};
1675 __half2
h2rint(__half2 x)
1677 return __half2{__ocml_rint_2f16(x)};
1681 __half2
h2sin(__half2 x)
1683 return __half2{__ocml_sin_2f16(x)};
1687 __half2
h2cos(__half2 x)
1689 return __half2{__ocml_cos_2f16(x)};
1693 __half2
h2exp(__half2 x)
1695 return __half2{__ocml_exp_2f16(x)};
1699 __half2
h2exp2(__half2 x)
1701 return __half2{__ocml_exp2_2f16(x)};
1707 return __half2{__ocml_exp10_2f16(x)};
1711 __half2
h2log2(__half2 x)
1713 return __half2{__ocml_log2_2f16(x)};
1717 __half2
h2log(__half2 x) {
return __ocml_log_2f16(x); }
1720 __half2
h2log10(__half2 x) {
return __ocml_log10_2f16(x); }
1723 __half2
h2rcp(__half2 x) {
1724 return _Float16_2{
static_cast<_Float16
>(__builtin_amdgcn_rcph(x.x)),
1725 static_cast<_Float16
>(__builtin_amdgcn_rcph(x.y))};
1729 __half2
h2rsqrt(__half2 x) {
return __ocml_rsqrt_2f16(x); }
1732 __half2
h2sqrt(__half2 x) {
return __ocml_sqrt_2f16(x); }
1735 __half2 __hisinf2(__half2 x)
1737 auto r = __ocml_isinf_2f16(x);
1738 return __half2{_Float16_2{
1739 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1745 auto r = __ocml_isnan_2f16(x);
1746 return __half2{_Float16_2{
1747 static_cast<_Float16
>(r.x),
static_cast<_Float16
>(r.y)}};
1753 return __half2{-
static_cast<__half2_raw>(x).data};
1757 #if !defined(HIP_NO_HALF)
1758 using half = __half;
1759 using half2 = __half2;
1763 __half __shfl(__half var,
int src_lane,
int width = warpSize) {
1764 union {
int i; __half h; } tmp; tmp.h = var;
1765 tmp.i = __shfl(tmp.i, src_lane, width);
1770 __half2 __shfl(__half2 var,
int src_lane,
int width = warpSize) {
1771 union {
int i; __half2 h; } tmp; tmp.h = var;
1772 tmp.i = __shfl(tmp.i, src_lane, width);
1777 __half __shfl_up(__half var,
unsigned int lane_delta,
int width = warpSize) {
1778 union {
int i; __half h; } tmp; tmp.h = var;
1779 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1784 __half2 __shfl_up(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1785 union {
int i; __half2 h; } tmp; tmp.h = var;
1786 tmp.i = __shfl_up(tmp.i, lane_delta, width);
1791 __half __shfl_down(__half var,
unsigned int lane_delta,
int width = warpSize) {
1792 union {
int i; __half h; } tmp; tmp.h = var;
1793 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1798 __half2 __shfl_down(__half2 var,
unsigned int lane_delta,
int width = warpSize) {
1799 union {
int i; __half2 h; } tmp; tmp.h = var;
1800 tmp.i = __shfl_down(tmp.i, lane_delta, width);
1805 __half __shfl_xor(__half var,
int lane_mask,
int width = warpSize) {
1806 union {
int i; __half h; } tmp; tmp.h = var;
1807 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1812 __half2 __shfl_xor(__half2 var,
int lane_mask,
int width = warpSize) {
1813 union {
int i; __half2 h; } tmp; tmp.h = var;
1814 tmp.i = __shfl_xor(tmp.i, lane_mask, width);
1818#elif defined(__GNUC__)
1819 #if !defined(__HIPCC_RTC__)
1820 #include "hip_fp16_gcc.h"
1824#if defined(__clang__)
1825#pragma clang diagnostic pop
#define __host__
Definition host_defines.h:170
__device__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:365
__device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:321
__device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:337
__device__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:329
__device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:355
__device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:347
__device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:313
__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:466
__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:514
__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:490
__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:450
__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:433
__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:522
__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:538
__device__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:506
__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:482
__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:441
__device__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:498
__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:458
__device__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:546
__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than.
Definition amd_hip_bf16.h:530
__device__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:552
__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:474
__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:582
__device__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:663
__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:630
__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:606
__device__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:700
__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:566
__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:558
__device__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:691
__device__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:654
__device__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:729
__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:622
__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:574
__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:590
__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:646
__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:614
__device__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:681
__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:638
__device__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:672
__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:598
__device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:400
__device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:375
__device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:384
__device__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:392
__device__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:425
__device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:417
__device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:409
__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:246
__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:274
__device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:280
__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:754
__device__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:818
__device__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:826
__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:746
__device__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:850
__device__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:794
__device__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:762
__device__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:738
__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:810
__device__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:842
__device__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:778
__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:834
__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:786
__device__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:802
__device__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:770
__device__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:954
__device__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:906
__device__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:922
__device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:962
__device__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:914
__device__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:858
__device__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:938
__device__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:930
__device__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:946
__device__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:866
__device__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:898
__device__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:882
__device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:970
__device__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:890
__device__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:874
Definition amd_hip_vector_types.h:1986
Definition hip_fp16_gcc.h:7
Definition hip_fp16_gcc.h:11