summaryrefslogtreecommitdiff
path: root/src/CuBaseLib/curand.tcc
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2014-04-14 08:14:45 +0800
committerJoe Zhao <ztuowen@gmail.com>2014-04-14 08:14:45 +0800
commitcccccbf6cca94a3eaf813b4468453160e91c332b (patch)
tree23418cb73a10ae3b0688681a7f0ba9b06424583e /src/CuBaseLib/curand.tcc
downloadtnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip
First commit
Diffstat (limited to 'src/CuBaseLib/curand.tcc')
-rw-r--r--src/CuBaseLib/curand.tcc228
1 files changed, 228 insertions, 0 deletions
diff --git a/src/CuBaseLib/curand.tcc b/src/CuBaseLib/curand.tcc
new file mode 100644
index 0000000..e337189
--- /dev/null
+++ b/src/CuBaseLib/curand.tcc
@@ -0,0 +1,228 @@
+
+#include <cstdlib>
+#include "curandkernels.h"
+
+
+namespace TNet {
+
+
+
+ template<typename T>
+ inline void
+ CuRand<T>::
+ SeedGpu(size_t rows, size_t cols)
+ {
+ Matrix<unsigned> mat(rows,cols);
+ SeedRandom(mat);
+ z1.CopyFrom(mat);
+ SeedRandom(mat);
+ z2.CopyFrom(mat);
+ SeedRandom(mat);
+ z3.CopyFrom(mat);
+ SeedRandom(mat);
+ z4.CopyFrom(mat);
+
+ /*
+ std::cout << "RANDININIT" << std::endl;
+ z1.Print();
+ z2.Print();
+ z3.Print();
+ z4.Print();
+ std::cout << "RANDININIT" << std::endl;
+ */
+
+ tmp.Init(rows,cols);
+ }
+
+
+
+ template<typename T>
+ inline void
+ CuRand<T>::
+ SeedRandom(Matrix<unsigned>& mat) {
+ for(size_t j=0; j<mat.Rows(); j++) {
+ for(size_t i=0; i<mat.Cols(); i++) {
+ unsigned value = 0;
+ while(value <= 128) { value = lrand48(); }
+ mat(j,i) = value;
+ }
+ }
+ }
+
+
+ template<typename T>
+ inline void
+ CuRand<T>::
+ AddGaussNoise(CuMatrix<T>& tgt, T gscale)
+ {
+ GaussRand(tmp);
+ tgt.AddScaled(gscale,tmp,1.0);
+ }
+
+
+
+
+
+
+
+ ////////////////////////////////////////////////////////////////////////////
+ //// invalid general wrappers over CUDA kernels
+ template<typename T>
+ inline void
+ CuRand<T>::
+ Rand(CuMatrix<T>& tgt)
+ { Error("Unimplemented"); }
+
+ template<typename T>
+ inline void
+ CuRand<T>::
+ GaussRand(CuMatrix<T>& tgt)
+ { Error("Unimplemented"); }
+
+ template<typename T>
+ inline void
+ CuRand<T>::
+ BinarizeProbs(const CuMatrix<T>& probs, CuMatrix<T>& states)
+ { Error("Unimplemented"); }
+
+
+ //////////////////////////////////////////////////////////////////////////
+ //// float specializations
+ template<>
+ inline void
+ CuRand<float>::
+ Rand(CuMatrix<float>& tgt)
+ {
+ Timer tim; tim.Start();
+
+ tgt.Init(z1.Rows(), z1.Cols());
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK));
+
+ cudaF_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+ template<>
+ inline void
+ CuRand<float>::
+ GaussRand(CuMatrix<float>& tgt)
+ {
+
+ Timer tim; tim.Start();
+
+ tgt.Init(z1.Rows(), z1.Cols());
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK));
+
+ cudaF_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+ template<>
+ inline void
+ CuRand<float>::
+ BinarizeProbs(const CuMatrix<float>& probs, CuMatrix<float>& states)
+ {
+ if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) {
+ Error("Non matching dims!!");
+ }
+
+ states.Init(z1.Rows(),z1.Cols());
+ Rand(tmp);
+
+ Timer tim; tim.Start();
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK));
+
+ cudaF_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+ //////////////////////////////////////////////////////////////////////////
+ //// double specializations
+ template<>
+ inline void
+ CuRand<double>::
+ Rand(CuMatrix<double>& tgt)
+ {
+ Timer tim; tim.Start();
+
+ tgt.Init(z1.Rows(), z1.Cols());
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK));
+
+ cudaD_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+ template<>
+ inline void
+ CuRand<double>::
+ GaussRand(CuMatrix<double>& tgt)
+ {
+
+ Timer tim; tim.Start();
+
+ tgt.Init(z1.Rows(), z1.Cols());
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK));
+
+ cudaD_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+ template<>
+ inline void
+ CuRand<double>::
+ BinarizeProbs(const CuMatrix<double>& probs, CuMatrix<double>& states)
+ {
+ if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) {
+ Error("Non matching dims!!");
+ }
+
+ states.Init(z1.Rows(),z1.Cols());
+ Rand(tmp);
+
+ Timer tim; tim.Start();
+
+ dim3 dimBlock(CUBLOCK,CUBLOCK);
+ dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK));
+
+ cudaD_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim());
+
+ cuSafeCall(cudaGetLastError());
+
+ tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val());
+ }
+
+
+
+}