blob: 57899b685cf8007d2b99d009eede36b50555b358 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#define NBODIES 16384
#define SOFTENINGSQUARED 0.01f
#define DELTATIME 0.001f
#define DAMPING 1.0f
#define NBLOCKSY 1
#define NBLOCKSX (NBODIES/NTHREADSX)
#define NTHREADSY 1
#define NTHREADSX 64
#define BLOCKSIZE 128
#define SHARED 1
#define TIMER 1
#define VERIFY 1
extern float sqrtf(float);
void nbody_cpu(float* oldpos,float* oldpos1, float *newpos, float *oldvel, float *newvel, float *force)
{
float r0,r1,r2;
float invDist, invDistCube, mass, invMass;
unsigned int i,j;
for(i = 0; i < NBODIES; ++i) {
//force[i*4 ] = 0;
//force[i*4+1] = 0;
//force[i*4+2] = 0;
//force[i*4+3] = 0;
for(j = 0; j < NBODIES; ++j) {
r0 = oldpos[j*4]-oldpos1[i*4];
r1 = oldpos[j*4+1]-oldpos1[i*4+1];
r2 = oldpos[j*4+2]-oldpos1[i*4+2];
invDist = 1.0/sqrtf(r0 * r0 + r1 * r1 + r2 * r2 + SOFTENINGSQUARED);
invDistCube = invDist * invDist * invDist;
mass = oldpos1[i*4+3];
force[i*4] = force[i*4] + r0 * mass * invDistCube;
force[i*4+1] = force[i*4+1] + r1 * mass * invDistCube;
force[i*4+2] = force[i*4+2] + r2 * mass * invDistCube;
}
}
/* for (i = 0; i < NBODIES; ++i) {
invMass = oldvel[4*i+3];
oldvel[4*i] += (force[4*i] * invMass) * DELTATIME * DAMPING;
oldvel[4*i+1] += (force[4*i+1] * invMass) * DELTATIME * DAMPING;
oldvel[4*i+2] += (force[4*i+2] * invMass) * DELTATIME * DAMPING;
oldpos[4*i] += oldvel[4*i] * DELTATIME;
oldpos[4*i+1] += oldvel[4*i+1] * DELTATIME;
oldpos[4*i+2] += oldvel[4*i+2] * DELTATIME;
newpos[4*i+0] = oldpos[4*i];
newpos[4*i+1] = oldpos[4*i+1];
newpos[4*i+2] = oldpos[4*i+2];
newpos[4*i+3] = oldpos[4*i+3];
newvel[4*i+0] = oldvel[4*i];
newvel[4*i+1] = oldvel[4*i+1];
newvel[4*i+2] = oldvel[4*i+2];
newvel[4*i+3] = oldvel[4*i+3];
}*/
}
|