9#if defined(NANOVDB_USE_CUDA)
10#include <cuda_runtime_api.h>
13#if defined(NANOVDB_USE_TBB)
14#include <tbb/parallel_for.h>
15#include <tbb/blocked_range.h>
24template<std::size_t...>
29template<std::size_t N, std::size_t... Is>
34template<std::size_t... Is>
40#if defined(__CUDACC__)
42static inline bool checkCUDA(cudaError_t result,
const char* file,
const int line)
44 if (result != cudaSuccess) {
45 std::cerr <<
"CUDA Runtime API error " << result <<
" in file " << file <<
", line " << line <<
" : " << cudaGetErrorString(result) <<
".\n";
51#define NANOVDB_CUDA_SAFE_CALL(x) checkCUDA(x, __FILE__, __LINE__)
53static inline void checkErrorCUDA(cudaError_t result,
const char* file,
const int line)
55 if (result != cudaSuccess) {
56 std::cerr <<
"CUDA Runtime API error " << result <<
" in file " << file <<
", line " << line <<
" : " << cudaGetErrorString(result) <<
".\n";
61#define NANOVDB_CUDA_CHECK_ERROR(result, file, line) checkErrorCUDA(result, file, line)
65template<
typename Fn,
typename... Args>
69 ApplyFunc(
int count,
int blockSize,
const Fn& fn, Args... args)
71 , mBlockSize(blockSize)
77 template<std::size_t... Is>
80 mFunc(start, end, std::get<Is>(mArgs)...);
85 int start = i * mBlockSize;
86 int end = i * mBlockSize + mBlockSize;
92#if defined(NANOVDB_USE_TBB)
93 void operator()(
const tbb::blocked_range<int>& r)
const
95 int start = r.begin();
107 std::tuple<Args...> mArgs;
110#if defined(__CUDACC__)
112template<
int WorkPerThread,
typename FnT,
typename... Args>
113__global__ void parallelForKernel(
int numItems, FnT f, Args... args)
115 for (
int j=0;j<WorkPerThread;++j)
117 int i = threadIdx.x + blockIdx.x * blockDim.x + j * blockDim.x * gridDim.x;
119 f(i, i + 1, args...);
127#if defined(__CUDACC__)
129 NANOVDB_CUDA_CHECK_ERROR(cudaDeviceSynchronize(), file, line);
134inline void computeFill(
bool useCuda,
void* data, uint8_t value,
size_t size)
137#if defined(__CUDACC__)
138 cudaMemset(data, value, size);
141 std::memset(data, value, size);
145template<
typename FunctorT,
typename... Args>
146inline void computeForEach(
bool useCuda,
int numItems,
int blockSize,
const char* file,
int line,
const FunctorT& op, Args... args)
152#if defined(__CUDACC__)
153 static const int WorkPerThread = 1;
154 int blockCount = ((numItems/WorkPerThread) + (blockSize - 1)) / blockSize;
155 parallelForKernel<WorkPerThread, FunctorT, Args...><<<blockCount, blockSize, 0, 0>>>(numItems, op, args...);
156 NANOVDB_CUDA_CHECK_ERROR(cudaGetLastError(), file, line);
159#if defined(NANOVDB_USE_TBB)
160 tbb::blocked_range<int> range(0, numItems, blockSize);
163 for (
int i = 0; i < numItems; ++i)
164 op(i, i + 1, args...);
172#if defined(__CUDACC__)
173 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
176 std::memcpy(dst, src, size);
180inline void computeCopy(
bool useCuda,
void* dst,
const void* src,
size_t size)
183#if defined(__CUDACC__)
184 cudaMemcpy(dst, src, size, cudaMemcpyDeviceToDevice);
187 std::memcpy(dst, src, size);
void computeForEach(bool useCuda, int numItems, int blockSize, const char *file, int line, const FunctorT &op, Args... args)
Definition ComputePrimitives.h:146
void computeDownload(bool useCuda, void *dst, const void *src, size_t size)
Definition ComputePrimitives.h:169
void computeSync(bool useCuda, const char *file, int line)
Definition ComputePrimitives.h:125
void computeFill(bool useCuda, void *data, uint8_t value, size_t size)
Definition ComputePrimitives.h:134
void computeCopy(bool useCuda, void *dst, const void *src, size_t size)
Definition ComputePrimitives.h:180
Definition ComputePrimitives.h:67
void call(int start, int end, cxx14::index_sequence< Is... >) const
Definition ComputePrimitives.h:78
void operator()(int i) const
Definition ComputePrimitives.h:83
ApplyFunc(int count, int blockSize, const Fn &fn, Args... args)
Definition ComputePrimitives.h:69
Definition ComputePrimitives.h:23
#define __global__
Definition Util.h:76
Definition ComputePrimitives.h:26
Definition ComputePrimitives.h:31