23 #ifndef COMPAT_CUDA_CUDA_RUNTIME_H
24 #define COMPAT_CUDA_CUDA_RUNTIME_H
27 #define __global__ __attribute__((global))
28 #define __device__ __attribute__((device))
29 #define __device_builtin__ __attribute__((device_builtin))
30 #define __align__(N) __attribute__((aligned(N)))
31 #define __inline__ __inline__ __attribute__((always_inline))
33 #define max(a, b) ((a) > (b) ? (a) : (b))
34 #define min(a, b) ((a) < (b) ? (a) : (b))
35 #define abs(x) ((x) < 0 ? -(x) : (x))
37 #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST))
67 typedef struct uint3 dim3;
71 unsigned char x, y, z,
w;
76 unsigned short x, y, z,
w;
90 #define GETCOMP(reg, comp) \
91 asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
94 #define GET(name, reg) static inline __device__ uint3 name() {\
103 GET(getBlockIdx, ctaid)
104 GET(getBlockDim, ntid)
105 GET(getThreadIdx, tid)
108 #define blockIdx (getBlockIdx())
109 #define blockDim (getBlockDim())
110 #define threadIdx (getThreadIdx())
113 #define make_int2(a, b) ((int2){.x = a, .y = b})
114 #define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
115 #define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
116 #define make_float2(a, b) ((float2){.x = a, .y = b})
117 #define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d})
118 #define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
119 #define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
120 #define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d})
123 #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
125 TEX2D(
unsigned char,
a & 0xFF)
126 TEX2D(
unsigned short,
a & 0xFFFF)
140 unsigned ret1, ret2, ret3, ret4;
141 asm(
"tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
142 "=r"(ret1),
"=r"(ret2),
"=r"(ret3),
"=r"(ret4) :
143 "l"(texObject),
"f"(x),
"f"(y));
144 conv(&ret, ret1, ret2, ret3, ret4);
152 asm(
"tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
153 "=r"(ret.x),
"=r"(ret.y),
"=r"(ret.z),
"=r"(ret.w) :
154 "l"(texObject),
"f"(x),
"f"(y));
#define make_ushort2(a, b)
__device__ float4 tex2D< float4 >(cudaTextureObject_t texObject, float x, float y)
__device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
static __device__ float __cosf(float a)
static __device__ float truncf(float a)
static __device__ float ceil(float a)
#define make_uchar2(a, b)
#define make_uchar4(a, b, c, d)
#define __device_builtin__
static __device__ float fabsf(float a)
static __device__ float __sinf(float a)
static __device__ float floorf(float a)
static __device__ float fabs(float a)
__device__ float2 tex2D< float2 >(cudaTextureObject_t texObject, float x, float y)
__device__ float tex2D< float >(cudaTextureObject_t texObject, float x, float y)
#define make_float2(a, b)
#define make_ushort4(a, b, c, d)
#define make_float4(a, b, c, d)
static __device__ float floor(float a)
__device_builtin__ unsigned long long cudaTextureObject_t
static __device__ float ceilf(float a)
static __device__ float trunc(float a)
static int conv(int samples, float **pcm, char *buf, int channels)