Decoder: add support for KEEP_THREADS_ACTIVE

Current design of AVC Decoder creates new threads
to process each frame. This CL adds support to reuse
the threads across frames.

Test: avcdec -c dec.cfg
Bug: 191124963

Change-Id: I087699c4c7ce6aa59a1d8b078ae196a50dcf889c
diff --git a/Android.bp b/Android.bp
index 602a94b..eab1fc5 100644
--- a/Android.bp
+++ b/Android.bp
@@ -31,6 +31,8 @@
         "-Werror",
         "-Wno-unused-variable",
         "-Wno-null-pointer-arithmetic",
+        // #KEEP_THREAD_ACTIVE is experimental
+        "-UKEEP_THREADS_ACTIVE",
     ],
 
     export_include_dirs: [
diff --git a/common/ithread.c b/common/ithread.c
index d19bdec..e7f63ad 100644
--- a/common/ithread.c
+++ b/common/ithread.c
@@ -209,3 +209,30 @@
     return 1;
 
 }
+
+#ifdef KEEP_THREADS_ACTIVE
+WORD32 ithread_get_cond_struct_size(void)
+{
+    return (sizeof(pthread_cond_t));
+}
+
+WORD32 ithread_cond_init(void *cond)
+{
+    return pthread_cond_init((pthread_cond_t *)cond, NULL);
+}
+
+WORD32 ithread_cond_destroy(void *cond)
+{
+    return pthread_cond_destroy((pthread_cond_t *)cond);
+}
+
+WORD32 ithread_cond_wait(void *cond, void *mutex)
+{
+    return pthread_cond_wait((pthread_cond_t *)cond, (pthread_mutex_t *)mutex);
+}
+
+WORD32 ithread_cond_signal(void *cond)
+{
+    return pthread_cond_signal((pthread_cond_t *)cond);
+}
+#endif
diff --git a/common/ithread.h b/common/ithread.h
index 3e5aa9c..c2843f8 100644
--- a/common/ithread.h
+++ b/common/ithread.h
@@ -98,4 +98,16 @@
 
 void    ithread_set_name(CHAR *pc_thread_name);
 
+#ifdef KEEP_THREADS_ACTIVE
+WORD32  ithread_get_cond_struct_size(void);
+
+WORD32  ithread_cond_init(void *cond);
+
+WORD32  ithread_cond_destroy(void *cond);
+
+WORD32  ithread_cond_wait(void *cond, void *mutex);
+
+WORD32  ithread_cond_signal(void *cond);
+#endif
+
 #endif /* _ITHREAD_H_ */
diff --git a/decoder/ih264d_api.c b/decoder/ih264d_api.c
index 2508d7b..923c519 100644
--- a/decoder/ih264d_api.c
+++ b/decoder/ih264d_api.c
@@ -1328,6 +1328,57 @@
     pf_aligned_free = ps_dec->pf_aligned_free;
     pv_mem_ctxt = ps_dec->pv_mem_ctxt;
 
+#ifdef KEEP_THREADS_ACTIVE
+    /* Wait for threads */
+    ps_dec->i4_break_threads = 1;
+    if(ps_dec->u4_dec_thread_created)
+    {
+        ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
+
+        ps_dec->ai4_process_start[0] = PROC_START;
+
+        ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
+
+        ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
+
+        ithread_join(ps_dec->pv_dec_thread_handle, NULL);
+
+        ps_dec->u4_dec_thread_created = 0;
+    }
+
+    if(ps_dec->u4_bs_deblk_thread_created)
+    {
+        ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
+
+        ps_dec->ai4_process_start[1] = PROC_START;
+
+        ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
+
+        ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
+
+        ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL);
+
+        ps_dec->u4_bs_deblk_thread_created = 0;
+    }
+
+    // destroy mutex and condition variable for both the threads
+    // 1. ih264d_decode_picture_thread
+    // 2. ih264d_recon_deblk_thread
+    {
+        UWORD32 i;
+        for(i = 0; i < 2; i++)
+        {
+            ithread_cond_destroy(ps_dec->apv_proc_start_condition[i]);
+            ithread_cond_destroy(ps_dec->apv_proc_done_condition[i]);
+
+            ithread_mutex_destroy(ps_dec->apv_proc_start_mutex[i]);
+            ithread_mutex_destroy(ps_dec->apv_proc_done_mutex[i]);
+        }
+    }
+    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->apv_proc_start_mutex[0]);
+    PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->apv_proc_start_condition[0]);
+#endif
+
     PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_sps);
     PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->ps_pps);
     PS_DEC_ALIGNED_FREE(ps_dec, ps_dec->pv_dec_thread_handle);
@@ -1469,6 +1520,59 @@
     memset(pv_buf, 0, size);
     ps_dec->pv_bs_deblk_thread_handle = pv_buf;
 
+#ifdef KEEP_THREADS_ACTIVE
+    {
+        UWORD32 i;
+        /* Request memory to hold mutex (start/done) for both threads */
+        size = ithread_get_mutex_lock_size() << 2;
+        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 8, size);
+        RETURN_IF((NULL == pv_buf), IV_FAIL);
+        memset(pv_buf, 0, size);
+
+        // init mutex variable for both the threads
+        // 1. ih264d_decode_picture_thread
+        // 2. ih264d_recon_deblk_thread
+        for(i = 0; i < 2; i++)
+        {
+            WORD32 ret;
+            WORD32 mutex_size = ithread_get_mutex_lock_size();
+
+            ps_dec->apv_proc_start_mutex[i] =
+                            (UWORD8 *)pv_buf + (2 * i * mutex_size);
+            ps_dec->apv_proc_done_mutex[i] =
+                            (UWORD8 *)pv_buf + ((2 * i + 1) * mutex_size);
+
+            ret = ithread_mutex_init(ps_dec->apv_proc_start_mutex[0]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+
+            ret = ithread_mutex_init(ps_dec->apv_proc_done_mutex[i]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+        }
+
+        size = ithread_get_cond_struct_size() << 2;
+        pv_buf = pf_aligned_alloc(pv_mem_ctxt, 8, size);
+        RETURN_IF((NULL == pv_buf), IV_FAIL);
+        memset(pv_buf, 0, size);
+
+        // init condition variable for both the threads
+        for(i = 0; i < 2; i++)
+        {
+            WORD32 ret;
+            WORD32 cond_size = ithread_get_cond_struct_size();
+            ps_dec->apv_proc_start_condition[i] =
+                            (UWORD8 *)pv_buf + (2 * i * cond_size);
+            ps_dec->apv_proc_done_condition[i] =
+                            (UWORD8 *)pv_buf + ((2 * i + 1) * cond_size);
+
+            ret = ithread_cond_init(ps_dec->apv_proc_start_condition[i]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+
+            ret = ithread_cond_init(ps_dec->apv_proc_done_condition[i]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+        }
+    }
+#endif
+
     size = sizeof(dpb_manager_t);
     pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pv_buf), IV_FAIL);
@@ -2018,6 +2122,26 @@
     }
     ps_dec->u1_pic_decode_done = 0;
 
+#ifdef KEEP_THREADS_ACTIVE
+    {
+        UWORD32 i;
+        ps_dec->i4_break_threads = 0;
+        for (i = 0; i < 2; i++)
+        {
+            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[i]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+
+            ps_dec->ai4_process_start[i] = PROC_INIT;
+
+            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[i]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+        }
+    }
+#else
+    ps_dec->u4_dec_thread_created = 0;
+    ps_dec->u4_bs_deblk_thread_created = 0;
+#endif
+
     ps_dec_op->u4_num_bytes_consumed = 0;
     ps_dec_op->i4_reorder_depth = -1;
     ps_dec_op->i4_display_index = DEFAULT_POC;
@@ -2123,7 +2247,7 @@
 
             if(ps_pic_buf == NULL)
             {
-                UWORD32 i, display_queued = 0;
+                UWORD32 display_queued = 0;
 
                 /* check if any buffer was given for display which is not returned yet */
                 for(i = 0; i < (MAX_DISP_BUFS_NEW); i++)
@@ -2253,9 +2377,6 @@
     ps_dec->u4_first_slice_in_pic = 1;
     ps_dec->u1_slice_header_done = 0;
     ps_dec->u1_dangling_field = 0;
-
-    ps_dec->u4_dec_thread_created = 0;
-    ps_dec->u4_bs_deblk_thread_created = 0;
     ps_dec->u4_cur_bs_mb_num = 0;
     ps_dec->u4_start_recon_deblk  = 0;
     ps_dec->u4_sps_cnt_in_process = 0;
@@ -2612,10 +2733,12 @@
     }
 
     /* close deblock thread if it is not closed yet*/
+#ifndef KEEP_THREADS_ACTIVE
     if(ps_dec->u4_num_cores == 3)
     {
         ih264d_signal_bs_deblk_thread(ps_dec);
     }
+#endif
 
 
     {
diff --git a/decoder/ih264d_parse_pslice.c b/decoder/ih264d_parse_pslice.c
index 6f8df9e..77ea4b3 100644
--- a/decoder/ih264d_parse_pslice.c
+++ b/decoder/ih264d_parse_pslice.c
@@ -1566,6 +1566,17 @@
 
                     ps_dec->u4_dec_thread_created = 1;
                 }
+#ifdef KEEP_THREADS_ACTIVE
+                ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+
+                ps_dec->ai4_process_start[0] = PROC_START;
+                ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+
+                ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+#endif
 
                 if((ps_dec->u4_num_cores == 3) &&
                                 ((ps_dec->u4_app_disable_deblk_frm == 0) || ps_dec->i1_recon_in_thread3_flag)
@@ -1577,6 +1588,20 @@
                                    (void *)ps_dec);
                     ps_dec->u4_bs_deblk_thread_created = 1;
                 }
+#ifdef KEEP_THREADS_ACTIVE
+                if (ps_dec->u4_bs_deblk_thread_created)
+                {
+                    ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
+                    RETURN_IF((ret != IV_SUCCESS), ret);
+
+                    ps_dec->ai4_process_start[1] = PROC_START;
+                    ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
+                    RETURN_IF((ret != IV_SUCCESS), ret);
+
+                    ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
+                    RETURN_IF((ret != IV_SUCCESS), ret);
+                }
+#endif
             }
         }
     }
diff --git a/decoder/ih264d_parse_slice.c b/decoder/ih264d_parse_slice.c
index 266c69b..a1764fd 100644
--- a/decoder/ih264d_parse_slice.c
+++ b/decoder/ih264d_parse_slice.c
@@ -1599,6 +1599,17 @@
 
                 ps_dec->u4_dec_thread_created = 1;
             }
+#ifdef KEEP_THREADS_ACTIVE
+            ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+
+            ps_dec->ai4_process_start[0] = PROC_START;
+            ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[0]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+
+            ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
+            RETURN_IF((ret != IV_SUCCESS), ret);
+#endif
 
             if((ps_dec->u4_num_cores == 3) &&
                             ((ps_dec->u4_app_disable_deblk_frm == 0) || ps_dec->i1_recon_in_thread3_flag)
@@ -1610,6 +1621,20 @@
                                (void *)ps_dec);
                 ps_dec->u4_bs_deblk_thread_created = 1;
             }
+#ifdef KEEP_THREADS_ACTIVE
+            if (ps_dec->u4_bs_deblk_thread_created)
+            {
+                ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+
+                ps_dec->ai4_process_start[1] = PROC_START;
+                ret = ithread_cond_signal(ps_dec->apv_proc_start_condition[1]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+
+                ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
+                RETURN_IF((ret != IV_SUCCESS), ret);
+            }
+#endif
         }
 
     }
diff --git a/decoder/ih264d_structs.h b/decoder/ih264d_structs.h
index fdfbada..e6a962c 100644
--- a/decoder/ih264d_structs.h
+++ b/decoder/ih264d_structs.h
@@ -123,6 +123,16 @@
     COEFF_ABS_LEVEL_CAT_5_OFFSET = 0
 } cabac_blk_cat_offset_t;
 
+#ifdef KEEP_THREADS_ACTIVE
+typedef enum
+{
+    PROC_INIT,
+    PROC_START,
+    PROC_IN_PROGRESS,
+    PROC_DONE,
+} proc_state_t;
+#endif
+
 /** Structure for the MV bank */
 typedef struct _mv_pred_t
 {
@@ -1277,6 +1287,44 @@
     UWORD8 u1_separate_parse;
     UWORD32 u4_dec_thread_created;
     void *pv_dec_thread_handle;
+
+#ifdef KEEP_THREADS_ACTIVE
+    /**
+     * Condition variable to signal process start - One for each thread
+     */
+    void *apv_proc_start_condition[2];
+
+    /**
+     * Mutex used to keep the functions thread-safe - One for each thread
+     */
+    void *apv_proc_start_mutex[2];
+
+    /**
+     * Condition variable to signal process done - One for each thread
+     */
+    void *apv_proc_done_condition[2];
+
+    /**
+     * Mutex used to keep the functions thread-safe - One for each thread
+     */
+    void *apv_proc_done_mutex[2];
+
+    /**
+     * Process state start - One for each thread
+     */
+    proc_state_t ai4_process_start[2];
+
+    /**
+     * Process state end - One for each thread
+     */
+    proc_state_t ai4_process_done[2];
+
+    /**
+     * Flag to signal processing thread to exit
+     */
+    WORD32 i4_break_threads;
+#endif
+
     volatile UWORD8 *pu1_dec_mb_map;
     volatile UWORD8 *pu1_recon_mb_map;
     volatile UWORD16 *pu2_slice_num_map;
diff --git a/decoder/ih264d_thread_compute_bs.c b/decoder/ih264d_thread_compute_bs.c
index 951cef4..e647a60 100644
--- a/decoder/ih264d_thread_compute_bs.c
+++ b/decoder/ih264d_thread_compute_bs.c
@@ -697,41 +697,73 @@
 
     while(1)
     {
+#ifdef KEEP_THREADS_ACTIVE
+        UWORD32 ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
+        if(OK != ret)
+            break;
 
-        DEBUG_THREADS_PRINTF(" Entering compute bs slice\n");
-        ih264d_recon_deblk_slice(ps_dec, ps_tfr_cxt);
-
-        DEBUG_THREADS_PRINTF(" Exit  compute bs slice \n");
-
-        if(ps_dec->cur_recon_mb_num > ps_dec->ps_cur_sps->u2_max_mb_addr)
+        while(ps_dec->ai4_process_start[1] != PROC_START)
         {
-                break;
+            ithread_cond_wait(ps_dec->apv_proc_start_condition[1],
+                              ps_dec->apv_proc_start_mutex[1]);
         }
-        else
+        ps_dec->ai4_process_start[1] = PROC_IN_PROGRESS;
+
+        ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
+        if(OK != ret || ps_dec->i4_break_threads == 1)
+            break;
+#endif
+
+        while(1)
         {
-            ps_dec->ps_computebs_cur_slice++;
-            ps_dec->u2_cur_slice_num_bs++;
+
+            DEBUG_THREADS_PRINTF(" Entering compute bs slice\n");
+            ih264d_recon_deblk_slice(ps_dec, ps_tfr_cxt);
+
+            DEBUG_THREADS_PRINTF(" Exit  compute bs slice \n");
+
+            if(ps_dec->cur_recon_mb_num > ps_dec->ps_cur_sps->u2_max_mb_addr)
+            {
+                    break;
+            }
+            else
+            {
+                ps_dec->ps_computebs_cur_slice++;
+                ps_dec->u2_cur_slice_num_bs++;
+            }
+            DEBUG_THREADS_PRINTF("CBS thread:Got next slice/end of frame signal \n ");
+
         }
-        DEBUG_THREADS_PRINTF("CBS thread:Got next slice/end of frame signal \n ");
 
+        if(ps_dec->u4_output_present &&
+            (3 == ps_dec->u4_num_cores) &&
+            (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
+        {
+            ps_dec->u4_fmt_conv_num_rows =
+                            (ps_dec->s_disp_frame_info.u4_y_ht
+                                            - ps_dec->u4_fmt_conv_cur_row);
+            ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+                                ps_dec->u4_fmt_conv_cur_row,
+                                ps_dec->u4_fmt_conv_num_rows);
+            ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+
+        }
+
+#ifdef KEEP_THREADS_ACTIVE
+        ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[1]);
+        if(OK != ret)
+            break;
+
+        ps_dec->ai4_process_done[1] = PROC_DONE;
+        ithread_cond_signal(ps_dec->apv_proc_done_condition[1]);
+
+        ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[1]);
+        if(OK != ret)
+            break;
+#else
+        break;
+#endif
     }
-
-    if(ps_dec->u4_output_present &&
-       (3 == ps_dec->u4_num_cores) &&
-       (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
-    {
-        ps_dec->u4_fmt_conv_num_rows =
-                        (ps_dec->s_disp_frame_info.u4_y_ht
-                                        - ps_dec->u4_fmt_conv_cur_row);
-        ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
-                              ps_dec->u4_fmt_conv_cur_row,
-                              ps_dec->u4_fmt_conv_num_rows);
-        ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
-
-    }
-
-
-
 }
 
 
diff --git a/decoder/ih264d_thread_parse_decode.c b/decoder/ih264d_thread_parse_decode.c
index 131c808..47571b3 100644
--- a/decoder/ih264d_thread_parse_decode.c
+++ b/decoder/ih264d_thread_parse_decode.c
@@ -593,39 +593,74 @@
 void ih264d_decode_picture_thread(dec_struct_t *ps_dec )
 {
     ithread_set_name("ih264d_decode_picture_thread");
+
     while(1)
     {
-        /*Complete all writes before processing next slice*/
-
-        DEBUG_THREADS_PRINTF(" Entering decode slice\n");
-
-        ih264d_decode_slice_thread(ps_dec);
-        DEBUG_THREADS_PRINTF(" Exit  ih264d_decode_slice_thread \n");
-
-
-        if(ps_dec->cur_dec_mb_num
-                        > ps_dec->ps_cur_sps->u2_max_mb_addr)
-        {
-            /*Last slice in frame*/
+#ifdef KEEP_THREADS_ACTIVE
+        WORD32 ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
+        if(OK != ret)
             break;
-        }
-        else
+
+        while(ps_dec->ai4_process_start[0] != PROC_START)
         {
-            ps_dec->ps_decode_cur_slice++;
-            ps_dec->u2_cur_slice_num_dec_thread++;
+            ithread_cond_wait(ps_dec->apv_proc_start_condition[0],
+                              ps_dec->apv_proc_start_mutex[0]);
+        }
+        ps_dec->ai4_process_start[0] = PROC_IN_PROGRESS;
+
+        ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
+        if(OK != ret || ps_dec->i4_break_threads == 1)
+            break;
+#endif
+        while(1)
+        {
+            /*Complete all writes before processing next slice*/
+
+            DEBUG_THREADS_PRINTF(" Entering decode slice\n");
+
+            ih264d_decode_slice_thread(ps_dec);
+            DEBUG_THREADS_PRINTF(" Exit  ih264d_decode_slice_thread \n");
+
+
+            if(ps_dec->cur_dec_mb_num
+                            > ps_dec->ps_cur_sps->u2_max_mb_addr)
+            {
+                /*Last slice in frame*/
+                break;
+            }
+            else
+            {
+                ps_dec->ps_decode_cur_slice++;
+                ps_dec->u2_cur_slice_num_dec_thread++;
+            }
+
+        }
+        if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
+            (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
+        {
+            ps_dec->u4_fmt_conv_num_rows =
+                            (ps_dec->s_disp_frame_info.u4_y_ht
+                                            - ps_dec->u4_fmt_conv_cur_row);
+            ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
+                                ps_dec->u4_fmt_conv_cur_row,
+                                ps_dec->u4_fmt_conv_num_rows);
+            ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
         }
 
-    }
-    if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) &&
-       (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht))
-    {
-        ps_dec->u4_fmt_conv_num_rows =
-                        (ps_dec->s_disp_frame_info.u4_y_ht
-                                        - ps_dec->u4_fmt_conv_cur_row);
-        ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op),
-                              ps_dec->u4_fmt_conv_cur_row,
-                              ps_dec->u4_fmt_conv_num_rows);
-        ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows;
+#ifdef KEEP_THREADS_ACTIVE
+        ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]);
+        if(OK != ret)
+            break;
+
+        ps_dec->ai4_process_done[0] = PROC_DONE;
+        ithread_cond_signal(ps_dec->apv_proc_done_condition[0]);
+
+        ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]);
+        if(OK != ret)
+            break;
+#else
+        break;
+#endif
     }
 }
 
@@ -633,16 +668,58 @@
 {
     if(ps_dec->u4_dec_thread_created == 1)
     {
+#ifdef KEEP_THREADS_ACTIVE
+        proc_state_t i4_process_state;
+        ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]);
+        i4_process_state = ps_dec->ai4_process_start[0];
+        ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]);
+
+        // only wait if the thread has started decoding
+        if(i4_process_state != PROC_INIT)
+        {
+            ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]);
+
+            while(ps_dec->ai4_process_done[0] != PROC_DONE)
+            {
+                ithread_cond_wait(ps_dec->apv_proc_done_condition[0],
+                                    ps_dec->apv_proc_done_mutex[0]);
+            }
+            ps_dec->ai4_process_done[0] = PROC_INIT;
+            ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]);
+        }
+#else
         ithread_join(ps_dec->pv_dec_thread_handle, NULL);
         ps_dec->u4_dec_thread_created = 0;
+#endif
     }
 }
 void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec)
 {
     if(ps_dec->u4_bs_deblk_thread_created)
     {
+#ifdef KEEP_THREADS_ACTIVE
+        proc_state_t i4_process_state;
+        ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]);
+        i4_process_state = ps_dec->ai4_process_start[1];
+        ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]);
+
+        // only wait if the thread has started deblking
+        if(i4_process_state != PROC_INIT)
+        {
+            ithread_mutex_lock(ps_dec->apv_proc_done_mutex[1]);
+
+            while(ps_dec->ai4_process_done[1] != PROC_DONE)
+            {
+                ithread_cond_wait(ps_dec->apv_proc_done_condition[1],
+                                    ps_dec->apv_proc_done_mutex[1]);
+            }
+            ps_dec->ai4_process_done[1] = PROC_INIT;
+            ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[1]);
+        }
+#else
         ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL);
         ps_dec->u4_bs_deblk_thread_created = 0;
+#endif
     }
 
 }