diff options
author | Joe Zhao <ztuowen@gmail.com> | 2014-04-14 08:14:45 +0800 |
---|---|---|
committer | Joe Zhao <ztuowen@gmail.com> | 2014-04-14 08:14:45 +0800 |
commit | cccccbf6cca94a3eaf813b4468453160e91c332b (patch) | |
tree | 23418cb73a10ae3b0688681a7f0ba9b06424583e /src/CuBaseLib/curand.tcc | |
download | tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2 tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip |
First commit
Diffstat (limited to 'src/CuBaseLib/curand.tcc')
-rw-r--r-- | src/CuBaseLib/curand.tcc | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/src/CuBaseLib/curand.tcc b/src/CuBaseLib/curand.tcc new file mode 100644 index 0000000..e337189 --- /dev/null +++ b/src/CuBaseLib/curand.tcc @@ -0,0 +1,228 @@ + +#include <cstdlib> +#include "curandkernels.h" + + +namespace TNet { + + + + template<typename T> + inline void + CuRand<T>:: + SeedGpu(size_t rows, size_t cols) + { + Matrix<unsigned> mat(rows,cols); + SeedRandom(mat); + z1.CopyFrom(mat); + SeedRandom(mat); + z2.CopyFrom(mat); + SeedRandom(mat); + z3.CopyFrom(mat); + SeedRandom(mat); + z4.CopyFrom(mat); + + /* + std::cout << "RANDININIT" << std::endl; + z1.Print(); + z2.Print(); + z3.Print(); + z4.Print(); + std::cout << "RANDININIT" << std::endl; + */ + + tmp.Init(rows,cols); + } + + + + template<typename T> + inline void + CuRand<T>:: + SeedRandom(Matrix<unsigned>& mat) { + for(size_t j=0; j<mat.Rows(); j++) { + for(size_t i=0; i<mat.Cols(); i++) { + unsigned value = 0; + while(value <= 128) { value = lrand48(); } + mat(j,i) = value; + } + } + } + + + template<typename T> + inline void + CuRand<T>:: + AddGaussNoise(CuMatrix<T>& tgt, T gscale) + { + GaussRand(tmp); + tgt.AddScaled(gscale,tmp,1.0); + } + + + + + + + + //////////////////////////////////////////////////////////////////////////// + //// invalid general wrappers over CUDA kernels + template<typename T> + inline void + CuRand<T>:: + Rand(CuMatrix<T>& tgt) + { Error("Unimplemented"); } + + template<typename T> + inline void + CuRand<T>:: + GaussRand(CuMatrix<T>& tgt) + { Error("Unimplemented"); } + + template<typename T> + inline void + CuRand<T>:: + BinarizeProbs(const CuMatrix<T>& probs, CuMatrix<T>& states) + { Error("Unimplemented"); } + + + ////////////////////////////////////////////////////////////////////////// + //// float specializations + template<> + inline void + CuRand<float>:: + Rand(CuMatrix<float>& tgt) + { + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaF_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand<float>:: + GaussRand(CuMatrix<float>& tgt) + { + + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaF_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand<float>:: + BinarizeProbs(const CuMatrix<float>& probs, CuMatrix<float>& states) + { + if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { + Error("Non matching dims!!"); + } + + states.Init(z1.Rows(),z1.Cols()); + Rand(tmp); + + Timer tim; tim.Start(); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); + + cudaF_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + ////////////////////////////////////////////////////////////////////////// + //// double specializations + template<> + inline void + CuRand<double>:: + Rand(CuMatrix<double>& tgt) + { + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaD_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand<double>:: + GaussRand(CuMatrix<double>& tgt) + { + + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaD_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand<double>:: + BinarizeProbs(const CuMatrix<double>& probs, CuMatrix<double>& states) + { + if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { + Error("Non matching dims!!"); + } + + states.Init(z1.Rows(),z1.Cols()); + Rand(tmp); + + Timer tim; tim.Start(); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); + + cudaD_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + +} |