API#
-
struct SubExecParam#
- #include <Kernels.hpp>
-
namespace std#
STL namespace.
- file Kernels.hpp
Defines
-
PackedFloat_t#
-
MAX_BLOCKSIZE#
-
FLOATS_PER_PACK#
-
MEMSET_CHAR#
-
MEMSET_VAL#
-
MAX_WAVEGROUPS#
-
MAX_UNROLL#
-
NUM_WAVEORDERS#
-
MAX_SRCS#
-
MAX_DSTS#
-
GetHwId(hwId)#
-
GetXccId(val)#
-
GPU_KERNEL_UNROLL_DECL(BLOCKSIZE)#
Typedefs
-
typedef void (*GpuKernelFuncPtr)(SubExecParam*, int, int)#
Functions
-
void CpuReduceKernel(SubExecParam const &p)#
- __host__ __device__ float PrepSrcValue (int srcBufferIdx, size_t idx)
- __global__ void CollectXccIdsKernel (int *xccIds)
- __global__ void PrepSrcDataKernel (float *ptr, size_t N, int srcBufferIdx)
- __device__ int64_t GetTimestamp ()
- template<typename T> __device__ __forceinline__ T MemsetVal ()
- template<> __device__ __forceinline__ float MemsetVal ()
- template<int BLOCKSIZE, int UNROLL> __global__ void __launch_bounds__ (BLOCKSIZE) GpuReduceKernel(SubExecParam *params
- if (threadIdx.x==0) startCycle
- if (p.preferredXccId !=-1 &&xccId !=p.preferredXccId) return
- for (int i=0;i< numSrcs;i++) srcFloat4[i]
- switch (waveOrder)
- while (1)
-
__syncthreads()#
Variables
- __global__ void int waveOrder
- __global__ void int int numSubIterations {int64_t startCycle
-
SubExecParam &p = params[blockIdx.y]#
-
int32_t xccId#
- float4 const *__restrict__ srcFloat4 [MAX_SRCS]
- float4 *__restrict__ dstFloat4 [MAX_DSTS]
-
int32_t const nWaves = BLOCKSIZE / warpSize#
-
int32_t const waveIdx = threadIdx.x / warpSize#
-
int32_t const tIdx = threadIdx.x % warpSize#
-
int32_t teamStride#
-
int32_t waveStride#
-
int32_t unrlStride#
-
int32_t teamStride2#
-
int32_t waveStride2#
-
int subIterations = 0#
-
GpuKernelFuncPtr GpuKernelTable[MAX_WAVEGROUPS][MAX_UNROLL] = {GPU_KERNEL_UNROLL_DECL(64), GPU_KERNEL_UNROLL_DECL(128), GPU_KERNEL_UNROLL_DECL(192), GPU_KERNEL_UNROLL_DECL(256), GPU_KERNEL_UNROLL_DECL(320), GPU_KERNEL_UNROLL_DECL(384), GPU_KERNEL_UNROLL_DECL(448),}#
-
PackedFloat_t#
- dir /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-transferbench/checkouts/docs-6.3.0/src/include
- dir /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-transferbench/checkouts/docs-6.3.0/src