Merge "Balls perf update"
diff --git a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/BallsRS.java b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/BallsRS.java
index 8cab9b8..d9d182c 100644
--- a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/BallsRS.java
+++ b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/BallsRS.java
@@ -20,8 +20,9 @@
 import android.renderscript.*;
 import android.util.Log;
 
+
 public class BallsRS {
-    public static final int PART_COUNT = 900;
+    public static final int PART_COUNT = 4000;
 
     public BallsRS() {
     }
@@ -30,11 +31,12 @@
     private RenderScriptGL mRS;
     private ScriptC_balls mScript;
     private ScriptC_ball_physics mPhysicsScript;
-    private ProgramFragment mPFLines;
     private ProgramFragment mPFPoints;
-    private ProgramVertex mPV;
     private ScriptField_Point mPoints;
     private ScriptField_VpConsts mVpConsts;
+    private ScriptField_BallGrid mGrid;
+    private ScriptField_Ball mBalls;
+    private Allocation mGridCache;
 
     void updateProjectionMatrices() {
         mVpConsts = new ScriptField_VpConsts(mRS, 1,
@@ -56,8 +58,8 @@
                     "  vec4 pos = vec4(0.0, 0.0, 0.0, 1.0);\n" +
                     "  pos.xy = ATTRIB_position;\n" +
                     "  gl_Position = UNI_MVP * pos;\n" +
-                    "  varColor = vec4(1.0, 1.0, 1.0, 1.0);\n" +
-                    "  gl_PointSize = ATTRIB_size;\n" +
+                    "  varColor = ATTRIB_color;\n" +
+                    "  gl_PointSize = 12.0;\n" +
                     "}\n";
         sb.setShader(t);
         sb.addConstant(mVpConsts.getType());
@@ -84,22 +86,21 @@
         return builder.create();
     }
 
-    public void init(RenderScriptGL rs, Resources res, int width, int height) {
-        mRS = rs;
-        mRes = res;
-
-        ProgramFragmentFixedFunction.Builder pfb = new ProgramFragmentFixedFunction.Builder(rs);
+    private void createPF(int width, int height) {
+        ProgramFragmentFixedFunction.Builder pfb = new ProgramFragmentFixedFunction.Builder(mRS);
         pfb.setPointSpriteTexCoordinateReplacement(true);
         pfb.setTexture(ProgramFragmentFixedFunction.Builder.EnvMode.MODULATE,
                            ProgramFragmentFixedFunction.Builder.Format.RGBA, 0);
         pfb.setVaryingColor(true);
         mPFPoints = pfb.create();
+    }
 
-        pfb = new ProgramFragmentFixedFunction.Builder(rs);
-        pfb.setVaryingColor(true);
-        mPFLines = pfb.create();
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
 
-        android.util.Log.e("rs", "Load texture");
+        createPF(width, height);
+
         mPFPoints.bindTexture(loadTexture(R.drawable.flares), 0);
 
         mPoints = new ScriptField_Point(mRS, PART_COUNT, Allocation.USAGE_SCRIPT);
@@ -109,16 +110,22 @@
         smb.addIndexSetType(Mesh.Primitive.POINT);
         Mesh smP = smb.create();
 
-        mPhysicsScript = new ScriptC_ball_physics(mRS, mRes, R.raw.ball_physics);
+        mGrid = ScriptField_BallGrid.create2D(mRS, (width + 99) / 100, (height + 99) / 100);
+        mGridCache = Allocation.createSized(mRS, Element.F32_2(mRS), PART_COUNT);
+        mBalls = new ScriptField_Ball(mRS, PART_COUNT, Allocation.USAGE_SCRIPT);
 
-        mScript = new ScriptC_balls(mRS, mRes, R.raw.balls);
+        mPhysicsScript = new ScriptC_ball_physics(mRS);
+        mPhysicsScript.set_gGridCache(mGridCache);
+        mPhysicsScript.set_gBalls(mBalls.getAllocation());
+
+        mScript = new ScriptC_balls(mRS);
         mScript.set_partMesh(smP);
         mScript.set_physics_script(mPhysicsScript);
         mScript.bind_point(mPoints);
-        mScript.bind_balls1(new ScriptField_Ball(mRS, PART_COUNT, Allocation.USAGE_SCRIPT));
-        mScript.bind_balls2(new ScriptField_Ball(mRS, PART_COUNT, Allocation.USAGE_SCRIPT));
+        mScript.bind_balls(mBalls);
+        mScript.set_gGrid(mGrid.getAllocation());
+        mScript.bind_gGridCache(mGridCache);
 
-        mScript.set_gPFLines(mPFLines);
         mScript.set_gPFPoints(mPFPoints);
         createProgramVertex();
 
@@ -126,6 +133,7 @@
 
         mPhysicsScript.set_gMinPos(new Float2(5, 5));
         mPhysicsScript.set_gMaxPos(new Float2(width - 5, height - 5));
+        mPhysicsScript.set_gGrid(mGrid.getAllocation());
 
         mScript.invoke_initParts(width, height);
 
@@ -133,7 +141,7 @@
     }
 
     public void newTouchPosition(float x, float y, float pressure, int id) {
-        mPhysicsScript.invoke_touch(x, y, pressure, id);
+        mPhysicsScript.invoke_touch(x, y, pressure * mRS.getWidth() / 1280, id);
     }
 
     public void setAccel(float x, float y) {
diff --git a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/ball_physics.rs b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/ball_physics.rs
index 8a3db6d..5b5d2e0 100644
--- a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/ball_physics.rs
+++ b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/ball_physics.rs
@@ -10,6 +10,13 @@
 
 static float2 touchPos[10];
 static float touchPressure[10];
+static const float gDT = 1.f / 30.f;
+
+rs_allocation gGrid;
+rs_allocation gGridCache;
+rs_allocation gBalls;
+
+float gScale = 1.f;
 
 void touch(float x, float y, float pressure, int id) {
     if (id >= 10) {
@@ -21,126 +28,128 @@
     touchPressure[id] = pressure;
 }
 
-void root(const Ball_t *ballIn, Ball_t *ballOut, const BallControl_t *ctl, uint32_t x) {
-    float2 fv = {0, 0};
-    float2 pos = ballIn->position;
+void root(Ball_t *ball, uint32_t x) {
+    float2 fv = 0;
+    float pressure = 0;
+    float2 pos = ball->position;
+    int2 gridPos[9];
 
-    int arcID = -1;
-    float arcInvStr = 100000;
+    gridPos[0] = convert_int2((ball->position / 100.f) /*- 0.4999f*/);
+    gridPos[1] = (int2){gridPos[0].x - 1, gridPos[0].y - 1};
+    gridPos[2] = (int2){gridPos[0].x + 0, gridPos[0].y - 1};
+    gridPos[3] = (int2){gridPos[0].x + 1, gridPos[0].y - 1};
+    gridPos[4] = (int2){gridPos[0].x - 1, gridPos[0].y};
+    gridPos[5] = (int2){gridPos[0].x + 1, gridPos[0].y};
+    gridPos[6] = (int2){gridPos[0].x - 1, gridPos[0].y + 1};
+    gridPos[7] = (int2){gridPos[0].x + 0, gridPos[0].y + 1};
+    gridPos[8] = (int2){gridPos[0].x + 1, gridPos[0].y + 1};
 
-    const Ball_t * bPtr = rsGetElementAt(ctl->ain, 0);
-    for (uint32_t xin = 0; xin < ctl->dimX; xin++) {
-        float2 vec = bPtr[xin].position - pos;
-        float2 vec2 = vec * vec;
-        float len2 = vec2.x + vec2.y;
+    for (int gct=0; gct < 9; gct++) {
+        if ((gridPos[gct].x >= rsAllocationGetDimX(gGrid)) ||
+            (gridPos[gct].x < 0) ||
+            (gridPos[gct].y >= rsAllocationGetDimY(gGrid)) ||
+            (gridPos[gct].y < 0)) {
+            continue;
+        }
+        //rsDebug("grid ", gridPos[gct]);
+        const BallGrid_t *bg = (const BallGrid_t *)rsGetElementAt(gGrid, gridPos[gct].x, gridPos[gct].y);
 
-        if (len2 < 10000) {
-            //float minDist = ballIn->size + bPtr[xin].size;
-            float forceScale = ballIn->size * bPtr[xin].size;
-            forceScale *= forceScale;
+        for (int cidx = 0; cidx < bg->count; cidx++) {
+            float2 bcptr = rsGetElementAt_float2(gGridCache, bg->cacheIdx + cidx);
+            float2 vec = bcptr - pos;
+            float2 vec2 = vec * vec;
+            float len2 = vec2.x + vec2.y;
 
-            if (len2 > 16 /* (minDist*minDist)*/)  {
-                // Repulsion
-                float len = sqrt(len2);
-                fv -= (vec / (len * len * len)) * 20000.f * forceScale;
-            } else {
-                if (len2 < 1) {
-                    if (xin == x) {
-                        continue;
-                    }
-                    ballOut->delta = 0.f;
-                    ballOut->position = ballIn->position;
-                    if (xin > x) {
-                        ballOut->position.x += 1.f;
-                    } else {
-                        ballOut->position.x -= 1.f;
-                    }
-                    //ballOut->color.rgb = 1.f;
-                    //ballOut->arcID = -1;
-                    //ballOut->arcStr = 0;
-                    continue;
-                }
-                // Collision
-                float2 axis = normalize(vec);
-                float e1 = dot(axis, ballIn->delta);
-                float e2 = dot(axis, bPtr[xin].delta);
-                float e = (e1 - e2) * 0.45f;
-                if (e1 > 0) {
-                    fv -= axis * e;
-                } else {
-                    fv += axis * e;
-                }
+            if ((len2 < 10000.f) && (len2 > 0.f)) {
+                float t = native_powr(len2, 1.5f) + 16.0f;
+                float2 pfv = (vec / t) * 16000.f;
+                pressure += length(pfv);
+                fv -= pfv;
             }
         }
     }
 
-    fv /= ballIn->size * ballIn->size * ballIn->size;
-    fv -= gGravityVector * 4.f;
-    fv *= ctl->dt;
+    //fv /= ball->size * ball->size * ball->size;
+    fv -= gGravityVector * 4.f * gScale;
+    fv *= gDT;
 
     for (int i=0; i < 10; i++) {
         if (touchPressure[i] > 0.1f) {
-            float2 vec = touchPos[i] - ballIn->position;
+            float2 vec = touchPos[i] - ball->position;
             float2 vec2 = vec * vec;
             float len2 = max(2.f, vec2.x + vec2.y);
-            fv -= (vec / len2) * touchPressure[i] * 300.f;
+            float2 pfv = (vec / len2) * touchPressure[i] * 500.f * gScale;
+            pressure += length(pfv);
+            fv -= pfv;
         }
     }
 
-    ballOut->delta = (ballIn->delta * (1.f - 0.004f)) + fv;
-    ballOut->position = ballIn->position + (ballOut->delta * ctl->dt);
+    ball->delta = (ball->delta * (1.f - 0.008f)) + fv;
+    ball->position = ball->position + (ball->delta * gDT);
 
-    const float wallForce = 400.f;
-    if (ballOut->position.x > (gMaxPos.x - 20.f)) {
-        float d = gMaxPos.x - ballOut->position.x;
+    const float wallForce = 400.f * gScale;
+    if (ball->position.x > (gMaxPos.x - 20.f)) {
+        float d = gMaxPos.x - ball->position.x;
         if (d < 0.f) {
-            if (ballOut->delta.x > 0) {
-                ballOut->delta.x *= -0.7f;
+            if (ball->delta.x > 0) {
+                ball->delta.x *= -0.7f;
             }
-            ballOut->position.x = gMaxPos.x;
+            ball->position.x = gMaxPos.x - 1.f;
         } else {
-            ballOut->delta.x -= min(wallForce / (d * d), 10.f);
+            ball->delta.x -= min(wallForce / (d * d), 10.f);
         }
     }
 
-    if (ballOut->position.x < (gMinPos.x + 20.f)) {
-        float d = ballOut->position.x - gMinPos.x;
+    if (ball->position.x < (gMinPos.x + 20.f)) {
+        float d = ball->position.x - gMinPos.x;
         if (d < 0.f) {
-            if (ballOut->delta.x < 0) {
-                ballOut->delta.x *= -0.7f;
+            if (ball->delta.x < 0) {
+                ball->delta.x *= -0.7f;
             }
-            ballOut->position.x = gMinPos.x + 1.f;
+            ball->position.x = gMinPos.x + 1.f;
         } else {
-            ballOut->delta.x += min(wallForce / (d * d), 10.f);
+            ball->delta.x += min(wallForce / (d * d), 10.f);
         }
     }
 
-    if (ballOut->position.y > (gMaxPos.y - 20.f)) {
-        float d = gMaxPos.y - ballOut->position.y;
+    if (ball->position.y > (gMaxPos.y - 20.f)) {
+        float d = gMaxPos.y - ball->position.y;
         if (d < 0.f) {
-            if (ballOut->delta.y > 0) {
-                ballOut->delta.y *= -0.7f;
+            if (ball->delta.y > 0) {
+                ball->delta.y *= -0.7f;
             }
-            ballOut->position.y = gMaxPos.y;
+            ball->position.y = gMaxPos.y - 1.f;
         } else {
-            ballOut->delta.y -= min(wallForce / (d * d), 10.f);
+            ball->delta.y -= min(wallForce / (d * d), 10.f);
         }
     }
 
-    if (ballOut->position.y < (gMinPos.y + 20.f)) {
-        float d = ballOut->position.y - gMinPos.y;
+    if (ball->position.y < (gMinPos.y + 20.f)) {
+        float d = ball->position.y - gMinPos.y;
         if (d < 0.f) {
-            if (ballOut->delta.y < 0) {
-                ballOut->delta.y *= -0.7f;
+            if (ball->delta.y < 0) {
+                ball->delta.y *= -0.7f;
             }
-            ballOut->position.y = gMinPos.y + 1.f;
+            ball->position.y = gMinPos.y + 1.f;
         } else {
-            ballOut->delta.y += min(wallForce / (d * d * d), 10.f);
+            ball->delta.y += min(wallForce / (d * d * d), 10.f);
         }
     }
 
-    ballOut->size = ballIn->size;
+    // low pressure ~500, high ~2500
+    pressure = max(pressure - 400.f, 0.f);
+    ball->pressure = pressure;
 
-    //rsDebug("physics pos out", ballOut->position);
+    //rsDebug("p ", pressure);
+
+    float4 color = 1.f;
+    color.r = pow(pressure, 0.25f) / 12.f;
+    color.b = 1.f - color.r;
+    color.g = sin(pressure / 1500.f * 3.14f);
+    color.rgb = max(color.rgb, (float3)0);
+    color.rgb = normalize(color.rgb);
+    ball->color = rsPackColorTo8888(color);
+
+    //rsDebug("physics pos out", ball->position);
 }
 
diff --git a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rs b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rs
index dcdd586..9be9f38 100644
--- a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rs
+++ b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rs
@@ -8,12 +8,15 @@
 #pragma stateStore(parent)
 
 rs_program_fragment gPFPoints;
-rs_program_fragment gPFLines;
 rs_mesh partMesh;
 
+rs_allocation gGrid;
+BallGrid_t *unused1;
+float2 *gGridCache;
+
 typedef struct __attribute__((packed, aligned(4))) Point {
     float2 position;
-    float size;
+    uchar4 color;
 } Point_t;
 Point_t *point;
 
@@ -24,58 +27,78 @@
 
 rs_script physics_script;
 
-Ball_t *balls1;
-Ball_t *balls2;
-
-static int frame = 0;
 
 void initParts(int w, int h)
 {
-    uint32_t dimX = rsAllocationGetDimX(rsGetAllocation(balls1));
+    uint32_t dimX = rsAllocationGetDimX(rsGetAllocation(balls));
 
     for (uint32_t ct=0; ct < dimX; ct++) {
-        balls1[ct].position.x = rsRand(0.f, (float)w);
-        balls1[ct].position.y = rsRand(0.f, (float)h);
-        balls1[ct].delta.x = 0.f;
-        balls1[ct].delta.y = 0.f;
-        balls1[ct].size = 1.f;
-
-        float r = rsRand(100.f);
-        if (r > 90.f) {
-            balls1[ct].size += pow(10.f, rsRand(0.f, 2.f)) * 0.07f;
-        }
+        balls[ct].position.x = rsRand(0.f, (float)w);
+        balls[ct].position.y = rsRand(0.f, (float)h);
+        balls[ct].delta.x = 0.f;
+        balls[ct].delta.y = 0.f;
     }
 }
 
-
-
 int root() {
     rsgClearColor(0.f, 0.f, 0.f, 1.f);
 
-    BallControl_t bc;
-    Ball_t *bout;
+    int2 gridDims = (int2){ rsAllocationGetDimX(gGrid),
+                            rsAllocationGetDimY(gGrid) };
 
-    if (frame & 1) {
-        bc.ain = rsGetAllocation(balls2);
-        bc.aout = rsGetAllocation(balls1);
-        bout = balls1;
-    } else {
-        bc.ain = rsGetAllocation(balls1);
-        bc.aout = rsGetAllocation(balls2);
-        bout = balls2;
+    rs_allocation ain = rsGetAllocation(balls);
+    int32_t dimX = rsAllocationGetDimX(ain);
+
+    // Binning
+    // Clear the particle list
+    for (uint32_t ct=0; ct < dimX; ct++) {
+        balls[ct].next = -1;
     }
 
-    bc.dimX = rsAllocationGetDimX(bc.ain);
-    bc.dt = 1.f / 30.f;
-
-    rsForEach(physics_script, bc.ain, bc.aout, &bc, sizeof(bc));
-
-    for (uint32_t ct=0; ct < bc.dimX; ct++) {
-        point[ct].position = bout[ct].position;
-        point[ct].size = 6.f /*+ bout[ct].color.g * 6.f*/ * bout[ct].size;
+    // Clear the grid
+    for (uint32_t y=0; y < gridDims.y; y++) {
+        for (uint32_t x=0; x < gridDims.x; x++) {
+            BallGrid_t *bg = (BallGrid_t *)rsGetElementAt(gGrid, x, y);
+            bg->count = 0;
+            bg->idx = -1;
+        }
     }
 
-    frame++;
+    // Create the particle list per grid
+    for (uint32_t ct=0; ct < dimX; ct++) {
+        int2 p = convert_int2(balls[ct].position / 100.f);
+        p.x = rsClamp(p.x, 0, (int)(gridDims.x-1));
+        p.y = rsClamp(p.y, 0, (int)(gridDims.y-1));
+        BallGrid_t *bg = (BallGrid_t *)rsGetElementAt(gGrid, p.x, p.y);
+        bg->count ++;
+        balls[ct].next = bg->idx;
+        bg->idx = ct;
+    }
+
+    // Create the sorted grid cache
+    uint32_t gridIdx = 0;
+    for (uint32_t y=0; y < gridDims.y; y++) {
+        for (uint32_t x=0; x < gridDims.x; x++) {
+            BallGrid_t *bg = (BallGrid_t *)rsGetElementAt(gGrid, x, y);
+            bg->cacheIdx = gridIdx;
+
+            int idx = bg->idx;
+            while (idx >= 0) {
+                const Ball_t * bPtr = &balls[idx];
+                gGridCache[gridIdx++] = bPtr->position;
+                idx = bPtr->next;
+            }
+        }
+    }
+
+
+    rsForEach(physics_script, ain, ain);
+
+    for (uint32_t ct=0; ct < dimX; ct++) {
+        point[ct].position = balls[ct].position;
+        point[ct].color = balls[ct].color;
+    }
+
     rsgBindProgramFragment(gPFPoints);
     rsgDrawMesh(partMesh);
     return 1;
diff --git a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rsh b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rsh
index fc886f9..ebe23f8 100644
--- a/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rsh
+++ b/tests/RenderScriptTests/Balls/src/com/example/android/rs/balls/balls.rsh
@@ -2,17 +2,18 @@
 typedef struct __attribute__((packed, aligned(4))) Ball {
     float2 delta;
     float2 position;
-    //float3 color;
-    float size;
+    uchar4 color;
+    float pressure;
+    //float size;
+    int32_t next;
     //int arcID;
     //float arcStr;
 } Ball_t;
 Ball_t *balls;
 
 
-typedef struct BallControl {
-    uint32_t dimX;
-    rs_allocation ain;
-    rs_allocation aout;
-    float dt;
-} BallControl_t;
+typedef struct BallGrid {
+    int32_t idx;
+    int32_t count;
+    int32_t cacheIdx;
+} BallGrid_t;