From cccccbf6cca94a3eaf813b4468453160e91c332b Mon Sep 17 00:00:00 2001 From: Joe Zhao Date: Mon, 14 Apr 2014 08:14:45 +0800 Subject: First commit --- src/CuBaseLib/.svn/text-base/curand.tcc.svn-base | 228 +++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 src/CuBaseLib/.svn/text-base/curand.tcc.svn-base (limited to 'src/CuBaseLib/.svn/text-base/curand.tcc.svn-base') diff --git a/src/CuBaseLib/.svn/text-base/curand.tcc.svn-base b/src/CuBaseLib/.svn/text-base/curand.tcc.svn-base new file mode 100644 index 0000000..e337189 --- /dev/null +++ b/src/CuBaseLib/.svn/text-base/curand.tcc.svn-base @@ -0,0 +1,228 @@ + +#include +#include "curandkernels.h" + + +namespace TNet { + + + + template + inline void + CuRand:: + SeedGpu(size_t rows, size_t cols) + { + Matrix mat(rows,cols); + SeedRandom(mat); + z1.CopyFrom(mat); + SeedRandom(mat); + z2.CopyFrom(mat); + SeedRandom(mat); + z3.CopyFrom(mat); + SeedRandom(mat); + z4.CopyFrom(mat); + + /* + std::cout << "RANDININIT" << std::endl; + z1.Print(); + z2.Print(); + z3.Print(); + z4.Print(); + std::cout << "RANDININIT" << std::endl; + */ + + tmp.Init(rows,cols); + } + + + + template + inline void + CuRand:: + SeedRandom(Matrix& mat) { + for(size_t j=0; j + inline void + CuRand:: + AddGaussNoise(CuMatrix& tgt, T gscale) + { + GaussRand(tmp); + tgt.AddScaled(gscale,tmp,1.0); + } + + + + + + + + //////////////////////////////////////////////////////////////////////////// + //// invalid general wrappers over CUDA kernels + template + inline void + CuRand:: + Rand(CuMatrix& tgt) + { Error("Unimplemented"); } + + template + inline void + CuRand:: + GaussRand(CuMatrix& tgt) + { Error("Unimplemented"); } + + template + inline void + CuRand:: + BinarizeProbs(const CuMatrix& probs, CuMatrix& states) + { Error("Unimplemented"); } + + + ////////////////////////////////////////////////////////////////////////// + //// float specializations + template<> + inline void + CuRand:: + Rand(CuMatrix& tgt) + { + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaF_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand:: + GaussRand(CuMatrix& tgt) + { + + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaF_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand:: + BinarizeProbs(const CuMatrix& probs, CuMatrix& states) + { + if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { + Error("Non matching dims!!"); + } + + states.Init(z1.Rows(),z1.Cols()); + Rand(tmp); + + Timer tim; tim.Start(); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); + + cudaF_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + ////////////////////////////////////////////////////////////////////////// + //// double specializations + template<> + inline void + CuRand:: + Rand(CuMatrix& tgt) + { + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaD_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand:: + GaussRand(CuMatrix& tgt) + { + + Timer tim; tim.Start(); + + tgt.Init(z1.Rows(), z1.Cols()); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); + + cudaD_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); + + cuSafeCall(cudaGetLastError()); + + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + template<> + inline void + CuRand:: + BinarizeProbs(const CuMatrix& probs, CuMatrix& states) + { + if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { + Error("Non matching dims!!"); + } + + states.Init(z1.Rows(),z1.Cols()); + Rand(tmp); + + Timer tim; tim.Start(); + + dim3 dimBlock(CUBLOCK,CUBLOCK); + dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); + + cudaD_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); + + cuSafeCall(cudaGetLastError()); + + tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); + } + + + +} -- cgit v1.2.3-70-g09d2