summaryrefslogtreecommitdiff
path: root/src/CuBaseLib/curandkernels.cu
diff options
context:
space:
mode:
authorJoe Zhao <ztuowen@gmail.com>2014-04-14 08:14:45 +0800
committerJoe Zhao <ztuowen@gmail.com>2014-04-14 08:14:45 +0800
commitcccccbf6cca94a3eaf813b4468453160e91c332b (patch)
tree23418cb73a10ae3b0688681a7f0ba9b06424583e /src/CuBaseLib/curandkernels.cu
downloadtnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.gz
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.tar.bz2
tnet-cccccbf6cca94a3eaf813b4468453160e91c332b.zip
First commit
Diffstat (limited to 'src/CuBaseLib/curandkernels.cu')
-rw-r--r--src/CuBaseLib/curandkernels.cu135
1 files changed, 135 insertions, 0 deletions
diff --git a/src/CuBaseLib/curandkernels.cu b/src/CuBaseLib/curandkernels.cu
new file mode 100644
index 0000000..5e42258
--- /dev/null
+++ b/src/CuBaseLib/curandkernels.cu
@@ -0,0 +1,135 @@
+
+#include "curandkernels.h"
+
+
+
+//
+//Hybrid Tausworthe/LCG random number generator
+//
+//http://http.developer.nvidia.com/GPUGems3/gpugems3_ch37.html
+
+
+// S1, S2, S3, and M are all constants, and z is part of the
+// private per-thread generator state.
+__device__
+static unsigned TausStep(unsigned &z, int S1, int S2, int S3, unsigned M)
+{
+ unsigned b=(((z << S1) ^ z) >> S2);
+ return z = (((z & M) << S3) ^ b);
+}
+
+// A and C are constants
+__device__
+static unsigned LCGStep(unsigned &z, unsigned A, unsigned C)
+{
+ return z=(A*z+C);
+}
+
+template<typename T>
+__device__
+static T HybridTaus(unsigned& z1, unsigned& z2, unsigned& z3, unsigned& z4)
+{
+ // Combined period is lcm(p1,p2,p3,p4)~ 2^121
+ T randval;
+ do {
+ randval = 2.3283064365387e-10 * ( // Periods
+ TausStep(z1, 13, 19, 12, 4294967294UL) ^ // p1=2^31-1
+ TausStep(z2, 2, 25, 4, 4294967288UL) ^ // p2=2^30-1
+ TausStep(z3, 3, 11, 17, 4294967280UL) ^ // p3=2^28-1
+ LCGStep(z4, 1664525, 1013904223UL) // p4=2^32
+ );
+ } while (!(randval > 0.0 && randval < 1.0));
+ return randval;
+}
+
+
+
+
+template<typename T>
+__global__
+static void _rand(T* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{
+ int i = blockIdx.x * blockDim.x + threadIdx.x;
+ int j = blockIdx.y * blockDim.y + threadIdx.y;
+ int index = i + j*d.stride;
+ if( i < d.cols && j < d.rows ) {
+ mat[index] = HybridTaus<T>(z1[index],z2[index],z3[index],z4[index]);
+ }
+}
+
+/*
+float2 BoxMuller()
+{
+ float u0=HybridTaus (), u1=HybridTaus ();
+ float r=sqrt(-2 log(u0));
+ float theta=2*PI*u1;
+ return make_float2(r*sin(theta),r*cos(theta));
+}
+*/
+
+template<typename T>
+__device__
+static T BoxMuller(unsigned& z1, unsigned& z2, unsigned& z3, unsigned& z4)
+{
+ const T M_2PI = 6.283185307179586476925286766558;
+
+ T u0 = HybridTaus<T>(z1,z2,z3,z4), u1 = HybridTaus<T>(z1,z2,z3,z4);
+ T r = sqrt(-2.0 * log(u0));
+ T theta = M_2PI * u1;
+ return r*sin(theta);
+
+}
+
+
+template<typename T>
+__global__
+static void _gauss_rand(T* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{
+ int i = blockIdx.x * blockDim.x + threadIdx.x;
+ int j = blockIdx.y * blockDim.y + threadIdx.y;
+ int index = i + j*d.stride;
+ if( i < d.cols && j < d.rows ) {
+ mat[index] = BoxMuller<T>(z1[index],z2[index],z3[index],z4[index]);
+ }
+}
+
+
+template<typename T>
+__global__
+static void _binarize_probs(T* states, const T* probs, const T* rand, MatrixDim d)
+{
+ int i = blockIdx.x * blockDim.x + threadIdx.x;
+ int j = blockIdx.y * blockDim.y + threadIdx.y;
+ int index = i + j*d.stride;
+ if( i < d.cols && j < d.rows ) {
+ states[index] = ((probs[index] > rand[index])? 1.0 : 0.0);
+ }
+}
+
+
+
+/************
+ * :FLOAT:
+ */
+void cudaF_rand(dim3 Gr, dim3 Bl, float* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{ _rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); }
+
+void cudaF_gauss_rand(dim3 Gr, dim3 Bl, float* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{ _gauss_rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); }
+
+void cudaF_binarize_probs(dim3 Gr, dim3 Bl, float* states, const float* probs, float* rand, MatrixDim d)
+{ _binarize_probs<<<Gr,Bl>>>(states,probs,rand,d); }
+
+
+/************
+ * :DOUBLE:
+ */
+void cudaD_rand(dim3 Gr, dim3 Bl, double* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{ _rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); }
+
+void cudaD_gauss_rand(dim3 Gr, dim3 Bl, double* mat, unsigned* z1, unsigned* z2, unsigned* z3, unsigned* z4, MatrixDim d)
+{ _gauss_rand<<<Gr,Bl>>>(mat,z1,z2,z3,z4,d); }
+
+void cudaD_binarize_probs(dim3 Gr, dim3 Bl, double* states, const double* probs, double* rand, MatrixDim d)
+{ _binarize_probs<<<Gr,Bl>>>(states,probs,rand,d); }
+