diff options
author | Joe Zhao <ztuowen@gmail.com> | 2014-04-14 08:14:45 +0800 |
---|---|---|
committer | Joe Zhao <ztuowen@gmail.com> | 2014-04-14 08:14:45 +0800 |
commit | cccccbf6cca94a3eaf813b4468453160e91c332b (patch) | |
tree | 23418cb73a10ae3b0688681a7f0ba9b06424583e /src/CuBaseLib/curandkernels.cu~ | |
download | tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2 tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip |
First commit
Diffstat (limited to 'src/CuBaseLib/curandkernels.cu~')
-rw-r--r-- | src/CuBaseLib/curandkernels.cu~ | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/src/CuBaseLib/curandkernels.cu~ b/src/CuBaseLib/curandkernels.cu~ new file mode 100644 index 0000000..7e1c8dd --- /dev/null +++ b/src/CuBaseLib/curandkernels.cu~ @@ -0,0 +1,135 @@ + +#include "curandkernels.h" + + + +// +//Hybrid Tauss/LCG random number generator +// +//http://http.developer.nvidia.com/GPUGems3/gpugems3_ch37.html + + +// S1, S2, S3, and M are all constants, and z is part of the +// private per-thread generator state. +__device__ +static unsigned TausStep(unsigned &z, int S1, int S2, int S3, unsigned M) +{ + unsigned b=(((z << S1) ^ z) >> S2); + return z = (((z & M) << S3) ^ b); +} + +// A and C are constants +__device__ +static unsigned LCGStep(unsigned &z, unsigned A, unsigned C) +{ + return z=(A*z+C); +} + +template<typename T> +__device__ +static T HybridTaus(unsigned& z1, unsigned& z2, unsigned& z3, unsigned& z4) +{ + // Combined period is lcm(p1,p2,p3,p4)~ 2^121 + T randval; + do { + randval = 2.3283064365387e-10 * ( // Periods + TausStep(z1, 13, 19, 12, 4294967294UL) ^ // p1=2^31-1 + TausStep(z2, 2, 25, 4, 4294967288UL) ^ // p2=2^30-1 + TausStep(z3, 3, 11, 17, 4294967280UL) ^ // p3=2^28-1 + LCGStep(z4, 1664525, 1013904223UL) // p4=2^32 + ); + } while (!(randval > 0.0 && randval < 1.0)); + return randval; +} + + + + +template<typename T> +__global__ +static void _rand(T* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int j = blockIdx.y * blockDim.y + threadIdx.y; + int index = i + j*d.stride; + if( i < d.cols && j < d.rows ) { + mat[index] = HybridTaus<T>(z1[index],z2[index],z3[index],z4[index]); + } +} + +/* +float2 BoxMuller() +{ + float u0=HybridTaus (), u1=HybridTaus (); + float r=sqrt(-2 log(u0)); + float theta=2*PI*u1; + return make_float2(r*sin(theta),r*cos(theta)); +} +*/ + +template<typename T> +__device__ +static T BoxMuller(unsigned& z1, unsigned& z2, unsigned& z3, unsigned& z4) +{ + const T M_2PI = 6.283185307179586476925286766558; + + T u0 = HybridTaus<T>(z1,z2,z3,z4), u1 = HybridTaus<T>(z1,z2,z3,z4); + T r = sqrt(-2.0 * log(u0)); + T theta = M_2PI * u1; + return r*sin(theta); + +} + + +template<typename T> +__global__ +static void _gauss_rand(T* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int j = blockIdx.y * blockDim.y + threadIdx.y; + int index = i + j*d.stride; + if( i < d.cols && j < d.rows ) { + mat[index] = BoxMuller<T>(z1[index],z2[index],z3[index],z4[index]); + } +} + + +template<typename T> +__global__ +static void _binarize_probs(T* states, const T* probs, const T* rand, MatrixDim d) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + int j = blockIdx.y * blockDim.y + threadIdx.y; + int index = i + j*d.stride; + if( i < d.cols && j < d.rows ) { + states[index] = ((probs[index] > rand[index])? 1.0 : 0.0); + } +} + + + +/************ + * :FLOAT: + */ +void cudaF_rand(dim3 Gr, dim3 Bl, float* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ _rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); } + +void cudaF_gauss_rand(dim3 Gr, dim3 Bl, float* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ _gauss_rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); } + +void cudaF_binarize_probs(dim3 Gr, dim3 Bl, float* states, const float* probs, float* rand, MatrixDim d) +{ _binarize_probs<<<Gr,Bl>>>(states,probs,rand,d); } + + +/************ + * :DOUBLE: + */ +void cudaD_rand(dim3 Gr, dim3 Bl, double* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ _rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); } + +void cudaD_gauss_rand(dim3 Gr, dim3 Bl, double* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d) +{ _gauss_rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); } + +void cudaD_binarize_probs(dim3 Gr, dim3 Bl, double* states, const double* probs, double* rand, MatrixDim d) +{ _binarize_probs<<<Gr,Bl>>>(states,probs,rand,d); } + |