Fix a race condition during Context init/teardown.

This change makes Context initialization wait properly for all threads to be
available before leaving initContext(). This prevents a case where ~Context()
gets called before all created threads have started/finished their work. This
was triggered/discovered by running some very short-lived tests in CTS.

BUG=4064203

Change-Id: I1dd6f43fa7942c53881008e857df864e2395202e
diff --git a/rsContext.cpp b/rsContext.cpp
index 4a5620d..f9e29f1 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -592,11 +592,11 @@
 void Context::launchThreads(WorkerCallback_t cbk, void *data) {
     mWorkers.mLaunchData = data;
     mWorkers.mLaunchCallback = cbk;
-    mWorkers.mRunningCount = (int)mWorkers.mCount;
+    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
         mWorkers.mLaunchSignals[ct].set();
     }
-    while (mWorkers.mRunningCount) {
+    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
         mWorkers.mCompleteSignal.wait();
     }
 }
@@ -707,8 +707,8 @@
     }
 
     mWorkers.mCompleteSignal.init();
-    mWorkers.mRunningCount = 0;
-    mWorkers.mLaunchCount = 0;
+    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
+    android_atomic_release_store(0, &mWorkers.mLaunchCount);
     for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
         status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
         if (status) {
@@ -717,6 +717,9 @@
             break;
         }
     }
+    while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
+        usleep(100);
+    }
     pthread_attr_destroy(&threadAttr);
     return true;
 }
@@ -736,14 +739,14 @@
     // Cleanup compute threads.
     mWorkers.mLaunchData = NULL;
     mWorkers.mLaunchCallback = NULL;
-    mWorkers.mRunningCount = (int)mWorkers.mCount;
+    android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
         mWorkers.mLaunchSignals[ct].set();
     }
     for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
         status = pthread_join(mWorkers.mThreadId[ct], &res);
     }
-    rsAssert(!mWorkers.mRunningCount);
+    rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0);
 
     // Global structure cleanup.
     pthread_mutex_lock(&gInitMutex);