summaryrefslogtreecommitdiff
path: root/examples/cuda-chill/nbody.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/cuda-chill/nbody.c')
-rw-r--r--examples/cuda-chill/nbody.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/examples/cuda-chill/nbody.c b/examples/cuda-chill/nbody.c
new file mode 100644
index 0000000..57899b6
--- /dev/null
+++ b/examples/cuda-chill/nbody.c
@@ -0,0 +1,66 @@
+#define NBODIES 16384
+#define SOFTENINGSQUARED 0.01f
+#define DELTATIME 0.001f
+#define DAMPING 1.0f
+
+#define NBLOCKSY 1
+#define NBLOCKSX (NBODIES/NTHREADSX)
+#define NTHREADSY 1
+#define NTHREADSX 64
+
+#define BLOCKSIZE 128
+
+#define SHARED 1
+#define TIMER 1
+#define VERIFY 1
+
+extern float sqrtf(float);
+
+void nbody_cpu(float* oldpos,float* oldpos1, float *newpos, float *oldvel, float *newvel, float *force)
+{
+ float r0,r1,r2;
+ float invDist, invDistCube, mass, invMass;
+ unsigned int i,j;
+ for(i = 0; i < NBODIES; ++i) {
+ //force[i*4 ] = 0;
+ //force[i*4+1] = 0;
+ //force[i*4+2] = 0;
+ //force[i*4+3] = 0;
+ for(j = 0; j < NBODIES; ++j) {
+ r0 = oldpos[j*4]-oldpos1[i*4];
+ r1 = oldpos[j*4+1]-oldpos1[i*4+1];
+ r2 = oldpos[j*4+2]-oldpos1[i*4+2];
+
+ invDist = 1.0/sqrtf(r0 * r0 + r1 * r1 + r2 * r2 + SOFTENINGSQUARED);
+ invDistCube = invDist * invDist * invDist;
+ mass = oldpos1[i*4+3];
+
+ force[i*4] = force[i*4] + r0 * mass * invDistCube;
+ force[i*4+1] = force[i*4+1] + r1 * mass * invDistCube;
+ force[i*4+2] = force[i*4+2] + r2 * mass * invDistCube;
+
+ }
+ }
+
+/* for (i = 0; i < NBODIES; ++i) {
+ invMass = oldvel[4*i+3];
+
+ oldvel[4*i] += (force[4*i] * invMass) * DELTATIME * DAMPING;
+ oldvel[4*i+1] += (force[4*i+1] * invMass) * DELTATIME * DAMPING;
+ oldvel[4*i+2] += (force[4*i+2] * invMass) * DELTATIME * DAMPING;
+
+ oldpos[4*i] += oldvel[4*i] * DELTATIME;
+ oldpos[4*i+1] += oldvel[4*i+1] * DELTATIME;
+ oldpos[4*i+2] += oldvel[4*i+2] * DELTATIME;
+
+ newpos[4*i+0] = oldpos[4*i];
+ newpos[4*i+1] = oldpos[4*i+1];
+ newpos[4*i+2] = oldpos[4*i+2];
+ newpos[4*i+3] = oldpos[4*i+3];
+
+ newvel[4*i+0] = oldvel[4*i];
+ newvel[4*i+1] = oldvel[4*i+1];
+ newvel[4*i+2] = oldvel[4*i+2];
+ newvel[4*i+3] = oldvel[4*i+3];
+ }*/
+}