#include #include "curandkernels.h" namespace TNet { template inline void CuRand:: SeedGpu(size_t rows, size_t cols) { Matrix mat(rows,cols); SeedRandom(mat); z1.CopyFrom(mat); SeedRandom(mat); z2.CopyFrom(mat); SeedRandom(mat); z3.CopyFrom(mat); SeedRandom(mat); z4.CopyFrom(mat); /* std::cout << "RANDININIT" << std::endl; z1.Print(); z2.Print(); z3.Print(); z4.Print(); std::cout << "RANDININIT" << std::endl; */ tmp.Init(rows,cols); } template inline void CuRand:: SeedRandom(Matrix& mat) { for(size_t j=0; j inline void CuRand:: AddGaussNoise(CuMatrix& tgt, T gscale) { GaussRand(tmp); tgt.AddScaled(gscale,tmp,1.0); } //////////////////////////////////////////////////////////////////////////// //// invalid general wrappers over CUDA kernels template inline void CuRand:: Rand(CuMatrix& tgt) { Error("Unimplemented"); } template inline void CuRand:: GaussRand(CuMatrix& tgt) { Error("Unimplemented"); } template inline void CuRand:: BinarizeProbs(const CuMatrix& probs, CuMatrix& states) { Error("Unimplemented"); } ////////////////////////////////////////////////////////////////////////// //// float specializations template<> inline void CuRand:: Rand(CuMatrix& tgt) { Timer tim; tim.Start(); tgt.Init(z1.Rows(), z1.Cols()); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); cudaF_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } template<> inline void CuRand:: GaussRand(CuMatrix& tgt) { Timer tim; tim.Start(); tgt.Init(z1.Rows(), z1.Cols()); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); cudaF_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } template<> inline void CuRand:: BinarizeProbs(const CuMatrix& probs, CuMatrix& states) { if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { Error("Non matching dims!!"); } states.Init(z1.Rows(),z1.Cols()); Rand(tmp); Timer tim; tim.Start(); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); cudaF_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } ////////////////////////////////////////////////////////////////////////// //// double specializations template<> inline void CuRand:: Rand(CuMatrix& tgt) { Timer tim; tim.Start(); tgt.Init(z1.Rows(), z1.Cols()); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); cudaD_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } template<> inline void CuRand:: GaussRand(CuMatrix& tgt) { Timer tim; tim.Start(); tgt.Init(z1.Rows(), z1.Cols()); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(tgt.Cols(), CUBLOCK), n_blocks(tgt.Rows(),CUBLOCK)); cudaD_gauss_rand(dimGrid,dimBlock,tgt.pCUData(), z1.pCUData(), z2.pCUData(), z3.pCUData(), z4.pCUData(),tgt.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } template<> inline void CuRand:: BinarizeProbs(const CuMatrix& probs, CuMatrix& states) { if(probs.Rows() != z1.Rows() || probs.Cols() != z1.Cols()) { Error("Non matching dims!!"); } states.Init(z1.Rows(),z1.Cols()); Rand(tmp); Timer tim; tim.Start(); dim3 dimBlock(CUBLOCK,CUBLOCK); dim3 dimGrid(n_blocks(z1.Cols(), CUBLOCK), n_blocks(z1.Rows(),CUBLOCK)); cudaD_binarize_probs(dimGrid,dimBlock,states.pCUData(), probs.pCUData(), tmp.pCUData(),states.Dim()); cuSafeCall(cudaGetLastError()); tim.End(); CuDevice::Instantiate().AccuProfile(__func__,tim.Val()); } }