Fix deadlock in camera destruction after client app's crash

* why deadlock happened: when an app (CTS camera test) crashes while using
  camera, its binder is closed and reference counter is decreased. If camera
  is inside callback, sp<Client> inside callback will hold the Client instance,
  and Client instance is destroyed when the callback ends as sp<Client> to hold
  it no longer exists. The destructor of Client instance tries to clean up
  camera H/W which tries to stop threads created by camera HAL including the
  thread context where the callback is running. This causes deadlock where the
  callback thread itself is waiting for itself to terminate.
  Note that the deadlock will not happen if camera callback is not active. In
  that case, closing of binder will force the destruction of Client instance,
  and the destruction happens in binder thread.

* Fix: Forces Client descruction in binder thread
- remove sp<Client> from callbacks to prevent destruction in callback context
- add client lock to allow callback to use raw pointer safely. This prevents
  the destructor from deleting the instance while callback is using it.
- add status change inside destructor with client lock to safely destroy Client

Bug: 6214383
Change-Id: Ic6d6396d4d95ce9e72a16ec2480ae65c100fe806
diff --git a/services/camera/libcameraservice/CameraService.cpp b/services/camera/libcameraservice/CameraService.cpp
index 22836e3..dc3f083 100644
--- a/services/camera/libcameraservice/CameraService.cpp
+++ b/services/camera/libcameraservice/CameraService.cpp
@@ -299,9 +299,14 @@
     LOG1("CameraService::removeClient X (pid %d)", callingPid);
 }
 
-sp<CameraService::Client> CameraService::getClientById(int cameraId) {
+CameraService::Client* CameraService::getClientByIdUnsafe(int cameraId) {
     if (cameraId < 0 || cameraId >= mNumberOfCameras) return NULL;
-    return mClient[cameraId].promote();
+    return mClient[cameraId].unsafe_get();
+}
+
+Mutex* CameraService::getClientLockById(int cameraId) {
+    if (cameraId < 0 || cameraId >= mNumberOfCameras) return NULL;
+    return &mClientLock[cameraId];
 }
 
 status_t CameraService::onTransact(
@@ -408,6 +413,7 @@
     mMsgEnabled = 0;
     mSurface = 0;
     mPreviewWindow = 0;
+    mDestructionStarted = false;
     mHardware->setCallbacks(notifyCallback,
                             dataCallback,
                             dataCallbackTimestamp,
@@ -428,6 +434,12 @@
 
 // tear down the client
 CameraService::Client::~Client() {
+    // this lock should never be NULL
+    Mutex* lock = mCameraService->getClientLockById(mCameraId);
+    lock->lock();
+    mDestructionStarted = true;
+    // client will not be accessed from callback. should unlock to prevent dead-lock in disconnect
+    lock->unlock();
     int callingPid = getCallingPid();
     LOG1("Client::~Client E (pid %d, this %p)", callingPid, this);
 
@@ -994,16 +1006,22 @@
 
 // ----------------------------------------------------------------------------
 
-// Converts from a raw pointer to the client to a strong pointer during a
-// hardware callback. This requires the callbacks only happen when the client
-// is still alive.
-sp<CameraService::Client> CameraService::Client::getClientFromCookie(void* user) {
-    sp<Client> client = gCameraService->getClientById((int) user);
+Mutex* CameraService::Client::getClientLockFromCookie(void* user) {
+    return gCameraService->getClientLockById((int) user);
+}
+
+// Provide client pointer for callbacks. Client lock returned from getClientLockFromCookie should
+// be acquired for this to be safe
+CameraService::Client* CameraService::Client::getClientFromCookie(void* user) {
+    Client* client = gCameraService->getClientByIdUnsafe((int) user);
 
     // This could happen if the Client is in the process of shutting down (the
     // last strong reference is gone, but the destructor hasn't finished
     // stopping the hardware).
-    if (client == 0) return NULL;
+    if (client == NULL) return NULL;
+
+    // destruction already started, so should not be accessed
+    if (client->mDestructionStarted) return NULL;
 
     // The checks below are not necessary and are for debugging only.
     if (client->mCameraService.get() != gCameraService) {
@@ -1046,8 +1064,13 @@
         int32_t ext2, void* user) {
     LOG2("notifyCallback(%d)", msgType);
 
-    sp<Client> client = getClientFromCookie(user);
-    if (client == 0) return;
+    Mutex* lock = getClientLockFromCookie(user);
+    if (lock == NULL) return;
+    Mutex::Autolock alock(*lock);
+
+    Client* client = getClientFromCookie(user);
+    if (client == NULL) return;
+
     if (!client->lockIfMessageWanted(msgType)) return;
 
     switch (msgType) {
@@ -1065,10 +1088,14 @@
         const sp<IMemory>& dataPtr, camera_frame_metadata_t *metadata, void* user) {
     LOG2("dataCallback(%d)", msgType);
 
-    sp<Client> client = getClientFromCookie(user);
-    if (client == 0) return;
-    if (!client->lockIfMessageWanted(msgType)) return;
+    Mutex* lock = getClientLockFromCookie(user);
+    if (lock == NULL) return;
+    Mutex::Autolock alock(*lock);
 
+    Client* client = getClientFromCookie(user);
+    if (client == NULL) return;
+
+    if (!client->lockIfMessageWanted(msgType)) return;
     if (dataPtr == 0 && metadata == NULL) {
         ALOGE("Null data returned in data callback");
         client->handleGenericNotify(CAMERA_MSG_ERROR, UNKNOWN_ERROR, 0);
@@ -1098,8 +1125,13 @@
         int32_t msgType, const sp<IMemory>& dataPtr, void* user) {
     LOG2("dataCallbackTimestamp(%d)", msgType);
 
-    sp<Client> client = getClientFromCookie(user);
-    if (client == 0) return;
+    Mutex* lock = getClientLockFromCookie(user);
+    if (lock == NULL) return;
+    Mutex::Autolock alock(*lock);
+
+    Client* client = getClientFromCookie(user);
+    if (client == NULL) return;
+
     if (!client->lockIfMessageWanted(msgType)) return;
 
     if (dataPtr == 0) {