Merge 5149f1844543625f789fd7619fa2e2de154e2836 on remote branch

Change-Id: I7b83de2ae456f4557e1966332322af3deeecf984
diff --git a/Android.bp b/Android.bp
index e5ef587..87203a0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -29,6 +29,8 @@
         "-O3",
         "-DANDROID",
         "-Werror",
+        // #KEEP_THREAD_ACTIVE is experimental
+        "-UKEEP_THREADS_ACTIVE",
     ],
 
     export_include_dirs: [
diff --git a/common/ithread.c b/common/ithread.c
index ebd9e72..2a31623 100644
--- a/common/ithread.c
+++ b/common/ithread.c
@@ -189,3 +189,30 @@
     return 1;
 
 }
+
+#ifdef KEEP_THREADS_ACTIVE
+WORD32 ithread_get_cond_struct_size(void)
+{
+    return (sizeof(pthread_cond_t));
+}
+
+WORD32 ithread_cond_init(void *cond)
+{
+    return pthread_cond_init((pthread_cond_t *)cond, NULL);
+}
+
+WORD32 ithread_cond_destroy(void *cond)
+{
+    return pthread_cond_destroy((pthread_cond_t *)cond);
+}
+
+WORD32 ithread_cond_wait(void *cond, void *mutex)
+{
+    return pthread_cond_wait((pthread_cond_t *)cond, (pthread_mutex_t *)mutex);
+}
+
+WORD32 ithread_cond_signal(void *cond)
+{
+    return pthread_cond_signal((pthread_cond_t *)cond);
+}
+#endif
diff --git a/common/ithread.h b/common/ithread.h
index eb75d20..dd17b0b 100644
--- a/common/ithread.h
+++ b/common/ithread.h
@@ -77,4 +77,17 @@
 WORD32  ithread_sem_destroy(void *sem);
 
 WORD32 ithread_set_affinity(WORD32 core_id);
+
+#ifdef KEEP_THREADS_ACTIVE
+WORD32 ithread_get_cond_struct_size(void);
+
+WORD32 ithread_cond_init(void *cond);
+
+WORD32 ithread_cond_destroy(void *cond);
+
+WORD32 ithread_cond_wait(void *cond, void *mutex);
+
+WORD32 ithread_cond_signal(void *cond);
+#endif
+
 #endif /* __ITHREAD_H__ */
diff --git a/decoder/impeg2d_api.h b/decoder/impeg2d_api.h
index f239637..9cf41be 100644
--- a/decoder/impeg2d_api.h
+++ b/decoder/impeg2d_api.h
@@ -49,11 +49,17 @@
 
 
 
+#ifdef KEEP_THREADS_ACTIVE
+#define THREADS_ACTIVE_MEM_RECORDS (2 * MAX_THREADS)
+#else
+#define THREADS_ACTIVE_MEM_RECORDS 0
+#endif
 
-#define NUM_MEM_RECORDS                 4 * MAX_THREADS + NUM_INT_FRAME_BUFFERS + 5 + 2 + 1
+#define NUM_MEM_RECORDS                 (4 * MAX_THREADS + NUM_INT_FRAME_BUFFERS + \
+                                         5 + 2 + 1 + THREADS_ACTIVE_MEM_RECORDS)
 
 
-#define SETBIT(a,i)   ((a) |= (1 << i))
+#define SETBIT(a,i)   ((a) |= (1 << (i)))
 
 
 /*********************/
diff --git a/decoder/impeg2d_api_main.c b/decoder/impeg2d_api_main.c
index 4e6dfef..626e493 100644
--- a/decoder/impeg2d_api_main.c
+++ b/decoder/impeg2d_api_main.c
@@ -559,6 +559,28 @@
         ps_mem_rec++;
         u1_no_rec++;
 
+#ifdef KEEP_THREADS_ACTIVE
+        /* To store start/done mutex */
+        ps_mem_rec->u4_mem_alignment = 8 /* 8 byte alignment*/;
+        ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        /* Request memory to hold mutex (start/done) */
+        WORD32 size = 2 * ithread_get_mutex_lock_size();
+        ps_mem_rec->u4_mem_size = size;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+
+        /* To store start/done condition variables */
+        ps_mem_rec->u4_mem_alignment = 8 /* 8 byte alignment*/;
+        ps_mem_rec->e_mem_type = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        /* Request memory to hold condition variables */
+        size = 2 * ithread_get_cond_struct_size();
+        ps_mem_rec->u4_mem_size = size;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+#endif
+
         /*************************************************************************/
         /*      Fill the memory requirement for Motion Compensation Buffers      */
         /*************************************************************************/
@@ -1704,6 +1726,12 @@
 
     for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++)
     {
+#ifdef KEEP_THREADS_ACTIVE
+    WORD32 ret;
+    UWORD8 *pv_buf;
+    WORD32 mutex_size = ithread_get_mutex_lock_size();
+    WORD32 cond_size = ithread_get_cond_struct_size();
+#endif
     /*************************************************************************/
     /*                      For MPEG2 Decoder Context                        */
     /*************************************************************************/
@@ -1723,6 +1751,45 @@
      u4_num_mem_rec++;
      ps_mem_rec++;
 
+#ifdef KEEP_THREADS_ACTIVE
+    pv_buf = ps_mem_rec->pv_base;
+    if (ps_mem_rec->u4_mem_size < 2 * mutex_size)
+    {
+        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_DEC_PER_MEM_INSUFFICIENT;
+        return(IV_FAIL);
+    }
+
+    ps_dec_state->pv_proc_start_mutex = (UWORD8 *)pv_buf;
+    ps_dec_state->pv_proc_done_mutex = (UWORD8 *)pv_buf + mutex_size;
+
+    ret = ithread_mutex_init(ps_dec_state->pv_proc_start_mutex);
+    RETURN_IF((ret != (IMPEG2D_ERROR_CODES_T)IV_SUCCESS), ret);
+
+    ret = ithread_mutex_init(ps_dec_state->pv_proc_done_mutex);
+    RETURN_IF((ret != (IMPEG2D_ERROR_CODES_T)IV_SUCCESS), ret);
+
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+
+    pv_buf = ps_mem_rec->pv_base;
+    if (ps_mem_rec->u4_mem_size < 2 * cond_size)
+    {
+        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_DEC_PER_MEM_INSUFFICIENT;
+        return(IV_FAIL);
+    }
+    ps_dec_state->pv_proc_start_condition = (UWORD8 *)pv_buf;
+    ps_dec_state->pv_proc_done_condition = (UWORD8 *)pv_buf + cond_size;
+
+    ret = ithread_cond_init(ps_dec_state->pv_proc_start_condition);
+    RETURN_IF((ret != (IMPEG2D_ERROR_CODES_T)IV_SUCCESS), ret);
+
+    ret = ithread_cond_init(ps_dec_state->pv_proc_done_condition);
+    RETURN_IF((ret != (IMPEG2D_ERROR_CODES_T)IV_SUCCESS), ret);
+
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+#endif
+
     /*************************************************************************/
     /*                      For Motion Compensation Buffers                  */
     /*************************************************************************/
@@ -1947,13 +2014,6 @@
     u4_num_mem_rec++;
     ps_mem_rec++;
 
-    if(u4_num_mem_rec > ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec)
-    {
-        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT;
-        return(IV_FAIL);
-
-    }
-
     ps_dec_state->u1_flushfrm = 0;
     ps_dec_state->u1_flushcnt = 0;
     ps_dec_state->pv_jobq = impeg2_jobq_init(ps_dec_state->pv_jobq_buf, ps_dec_state->i4_jobq_buf_size);
@@ -1978,6 +2038,11 @@
     ps_mem_rec++;
     ps_dec_state->u4_num_mem_records = u4_num_mem_rec;
 
+    if(u4_num_mem_rec != ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec)
+    {
+        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT;
+        return(IV_FAIL);
+    }
 
     ps_dec_state->u4_num_frames_decoded    = 0;
     ps_dec_state->aps_ref_pics[0] = NULL;
@@ -2020,6 +2085,10 @@
     dec_state_multi_core_t *ps_dec_state_multi_core;
     iv_mem_rec_t *ps_mem_rec;
     iv_mem_rec_t *ps_temp_rec;
+#ifdef KEEP_THREADS_ACTIVE
+    IMPEG2D_ERROR_CODES_T ret;
+    dec_state_t *ps_dec_thd;
+#endif
 
 
 
@@ -2045,6 +2114,48 @@
     ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code       = IV_SUCCESS;
     ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_num_mem_rec_filled   = ps_dec_state->u4_num_mem_records;
 
+#ifdef KEEP_THREADS_ACTIVE
+    for(u4_i = 0; u4_i < MAX_THREADS; u4_i++)
+    {
+        ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[u4_i];
+        if(ps_dec_state_multi_core->au4_thread_launched[u4_i])
+        {
+            ret = ithread_mutex_lock(ps_dec_thd->pv_proc_start_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret) return(IV_FAIL);
+
+            // set process start for the threads waiting on the start condition
+            // in the decode routine so as to break them
+            ps_dec_thd->ai4_process_start = 1;
+            ps_dec_state_multi_core->i4_break_threads = 1;
+
+            ret = ithread_cond_signal(ps_dec_thd->pv_proc_start_condition);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret) return(IV_FAIL);
+
+            ret = ithread_mutex_unlock(ps_dec_thd->pv_proc_start_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret) return(IV_FAIL);
+
+            ithread_join(ps_dec_thd->pv_codec_thread_handle, NULL);
+            ps_dec_state_multi_core->au4_thread_launched[u4_i] = 0;
+        }
+
+        ret = ithread_cond_destroy(ps_dec_thd->pv_proc_start_condition);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret)
+            return(IV_FAIL);
+
+        ret = ithread_cond_destroy(ps_dec_thd->pv_proc_done_condition);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret)
+            return(IV_FAIL);
+
+        ret = ithread_mutex_destroy(ps_dec_thd->pv_proc_start_mutex);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret)
+            return(IV_FAIL);
+
+        ret = ithread_mutex_destroy(ps_dec_thd->pv_proc_done_mutex);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != ret)
+            return(IV_FAIL);
+    }
+#endif
+
     impeg2_jobq_deinit(ps_dec_state->pv_jobq);
     IMPEG2D_PRINT_STATISTICS();
 
diff --git a/decoder/impeg2d_dec_hdr.c b/decoder/impeg2d_dec_hdr.c
index 3c974a5..26a043a 100644
--- a/decoder/impeg2d_dec_hdr.c
+++ b/decoder/impeg2d_dec_hdr.c
@@ -945,21 +945,47 @@
     UWORD32 u4_bits_read;
     WORD32 i4_dequeue_job;
     IMPEG2D_ERROR_CODES_T e_error;
+#ifdef KEEP_THREADS_ACTIVE
+    UWORD32 id = ps_dec->currThreadId;
+    dec_state_multi_core_t *ps_dec_state_multi_core = ps_dec->ps_dec_state_multi_core;
+#endif
 
-    i4_cur_row = ps_dec->u2_mb_y + 1;
-
-    i4_continue_decode = 1;
-
-    i4_dequeue_job = 1;
-    do
+    while (1)
     {
-        if(i4_cur_row > ps_dec->u2_num_vert_mb)
+#ifdef KEEP_THREADS_ACTIVE
+        if(id != 0)
         {
-            i4_continue_decode = 0;
-            break;
-        }
+            e_error = ithread_mutex_lock(ps_dec->pv_proc_start_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != e_error)
+                break;
 
+            while(!ps_dec->ai4_process_start)
+            {
+                ithread_cond_wait(ps_dec->pv_proc_start_condition,
+                                  ps_dec->pv_proc_start_mutex);
+            }
+            ps_dec->ai4_process_start = 0;
+            e_error = ithread_mutex_unlock(ps_dec->pv_proc_start_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != e_error)
+                break;
+            // break off at the end of decoding all the frames
+            if(ps_dec_state_multi_core->i4_break_threads)
+                break;
+        }
+#endif
+        i4_cur_row = ps_dec->u2_mb_y + 1;
+
+        i4_continue_decode = 1;
+
+        i4_dequeue_job = 1;
+        do
         {
+            if(i4_cur_row > ps_dec->u2_num_vert_mb)
+            {
+                i4_continue_decode = 0;
+                break;
+            }
+
             if((ps_dec->i4_num_cores> 1) && (i4_dequeue_job))
             {
                 job_t s_job;
@@ -1022,113 +1048,134 @@
                     return;
                 }
             }
-        }
 
-        /* Detecting next slice start code */
-        while(1)
-        {
-            // skip (dec->u4_num_cores-1) rows
-            u4_bits_read = impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,START_CODE_LEN);
-            temp = u4_bits_read & 0xFF;
-            i4_continue_decode = (((u4_bits_read >> 8) == 0x01) && (temp) && (temp <= 0xAF));
-
-            if (1 == ps_dec->i4_num_cores && 0 == ps_dec->u2_num_mbs_left)
+            /* Detecting next slice start code */
+            while(1)
             {
-                i4_continue_decode = 0;
-#ifdef __ANDROID__
-                android_errorWriteLog(0x534e4554, "26070014");
-#endif
-            }
+                // skip (dec->u4_num_cores-1) rows
+                u4_bits_read = impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,START_CODE_LEN);
+                temp = u4_bits_read & 0xFF;
+                i4_continue_decode = (((u4_bits_read >> 8) == 0x01) && (temp) && (temp <= 0xAF));
 
-            if(i4_continue_decode)
-            {
-                if (0 != ps_dec->u2_num_mbs_left)
+                if (1 == ps_dec->i4_num_cores && 0 == ps_dec->u2_num_mbs_left)
                 {
-                    /* If the slice is from the same row, then continue decoding without dequeue */
-                    if((temp - 1) == i4_cur_row)
+                    i4_continue_decode = 0;
+#ifdef __ANDROID__
+                    android_errorWriteLog(0x534e4554, "26070014");
+#endif
+                }
+
+                if(i4_continue_decode)
+                {
+                    if (0 != ps_dec->u2_num_mbs_left)
                     {
-                        i4_dequeue_job = 0;
-                    }
-                    else
-                    {
-                        if(temp < ps_dec->i4_end_mb_y)
+                        /* If the slice is from the same row, then continue decoding without dequeue */
+                        if((temp - 1) == i4_cur_row)
                         {
-                            i4_cur_row = ps_dec->u2_mb_y;
+                            i4_dequeue_job = 0;
                         }
                         else
                         {
-                            i4_dequeue_job = 1;
+                            if(temp < ps_dec->i4_end_mb_y)
+                            {
+                                i4_cur_row = ps_dec->u2_mb_y;
+                            }
+                            else
+                            {
+                                i4_dequeue_job = 1;
+                            }
                         }
                     }
+                    else
+                    {
+                        i4_dequeue_job = 1;
+                    }
+                    break;
                 }
                 else
-                {
-                    i4_dequeue_job = 1;
-                }
-                break;
+                    break;
             }
-            else
-                break;
-        }
 
-    }while(i4_continue_decode);
-    if(ps_dec->i4_num_cores > 1)
-    {
-        while(1)
+        }while(i4_continue_decode);
+        if(ps_dec->i4_num_cores > 1)
         {
-            job_t s_job;
-            IV_API_CALL_STATUS_T e_ret;
-
-            e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1);
-            if(e_ret != IV_SUCCESS)
-                break;
-            if(CMD_FMTCONV == s_job.i4_cmd)
+            while(1)
             {
-                WORD32 start_row;
-                WORD32 num_rows;
-                start_row = s_job.i2_start_mb_y << 4;
-                num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size);
-                num_rows -= start_row;
+                job_t s_job;
+                IV_API_CALL_STATUS_T e_ret;
+
+                e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1);
+                if(e_ret != IV_SUCCESS)
+                    break;
+                if(CMD_FMTCONV == s_job.i4_cmd)
+                {
+                    WORD32 start_row;
+                    WORD32 num_rows;
+                    start_row = s_job.i2_start_mb_y << 4;
+                    num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size);
+                    num_rows -= start_row;
+                    if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame))
+                    {
+                        impeg2d_deinterlace(ps_dec,
+                                            ps_dec->ps_disp_pic,
+                                            ps_dec->ps_disp_frm_buf,
+                                            start_row,
+                                            num_rows);
+
+                    }
+                    else
+                    {
+                        impeg2d_format_convert(ps_dec,
+                                               ps_dec->ps_disp_pic,
+                                               ps_dec->ps_disp_frm_buf,
+                                               start_row,
+                                               num_rows);
+                    }
+                }
+            }
+        }
+        else
+        {
+            if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat)))
+            {
                 if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame))
                 {
                     impeg2d_deinterlace(ps_dec,
                                         ps_dec->ps_disp_pic,
                                         ps_dec->ps_disp_frm_buf,
-                                        start_row,
-                                        num_rows);
+                                        0,
+                                        ps_dec->u2_vertical_size);
 
                 }
                 else
                 {
-                    impeg2d_format_convert(ps_dec,
-                                           ps_dec->ps_disp_pic,
-                                           ps_dec->ps_disp_frm_buf,
-                                           start_row,
-                                           num_rows);
+                    impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic,
+                                            ps_dec->ps_disp_frm_buf,
+                                            0, ps_dec->u2_vertical_size);
                 }
             }
         }
-    }
-    else
-    {
-        if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat)))
+#ifdef KEEP_THREADS_ACTIVE
+        if(id != 0)
         {
-            if(ps_dec->u4_deinterlace && (0 == ps_dec->u2_progressive_frame))
-            {
-                impeg2d_deinterlace(ps_dec,
-                                    ps_dec->ps_disp_pic,
-                                    ps_dec->ps_disp_frm_buf,
-                                    0,
-                                    ps_dec->u2_vertical_size);
+            e_error = ithread_mutex_lock(ps_dec->pv_proc_done_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != e_error)
+                break;
 
-            }
-            else
-            {
-                impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic,
-                                        ps_dec->ps_disp_frm_buf,
-                                        0, ps_dec->u2_vertical_size);
-            }
+            ps_dec->ai4_process_done = 1;
+            ithread_cond_signal(ps_dec->pv_proc_done_condition);
+
+            e_error = ithread_mutex_unlock(ps_dec->pv_proc_done_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != e_error)
+                break;
         }
+        else
+        {
+            break;
+        }
+#else
+        break;
+#endif
     }
 }
 
@@ -1427,13 +1474,17 @@
     ps_dec_state_multi_core = ps_dec->ps_dec_state_multi_core;
     impeg2d_get_slice_pos(ps_dec_state_multi_core);
 
+#ifdef KEEP_THREADS_ACTIVE
+    ps_dec->currThreadId = 0;
+#endif
+
     i4_min_mb_y = 1;
-    for(i=0; i < ps_dec->i4_num_cores - 1; i++)
+    for(i=1; i < ps_dec->i4_num_cores; i++)
     {
         // initialize decoder context for thread
         // launch dec->u4_num_cores-1 threads
 
-        ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1];
+        ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i];
 
         ps_dec_thd->ps_disp_pic = ps_dec->ps_disp_pic;
         ps_dec_thd->ps_disp_frm_buf = ps_dec->ps_disp_frm_buf;
@@ -1441,28 +1492,57 @@
         i4_status = impeg2d_init_thread_dec_ctxt(ps_dec, ps_dec_thd, i4_min_mb_y);
         //impeg2d_dec_pic_data_thread(ps_dec_thd);
 
-        if(i4_status == 0)
+        if(i4_status == 0 && !ps_dec_state_multi_core->au4_thread_launched[i])
         {
+#ifdef KEEP_THREADS_ACTIVE
+            ps_dec_thd->currThreadId = i;
+#endif
             ithread_create(ps_dec_thd->pv_codec_thread_handle, NULL, (void *)impeg2d_dec_pic_data_thread, ps_dec_thd);
-            ps_dec_state_multi_core->au4_thread_launched[i + 1] = 1;
+            ps_dec_state_multi_core->au4_thread_launched[i] = 1;
             i4_min_mb_y = ps_dec_thd->u2_mb_y + 1;
         }
+#ifndef KEEP_THREADS_ACTIVE
         else
         {
-            ps_dec_state_multi_core->au4_thread_launched[i + 1] = 0;
+            ps_dec_state_multi_core->au4_thread_launched[i] = 0;
             break;
         }
+#else
+        i4_status = ithread_mutex_lock(ps_dec_thd->pv_proc_start_mutex);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != i4_status) return;
+
+        ps_dec_thd->ai4_process_start = 1;
+        ithread_cond_signal(ps_dec_thd->pv_proc_start_condition);
+
+        i4_status = ithread_mutex_unlock(ps_dec_thd->pv_proc_start_mutex);
+        if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != i4_status) return;
+#endif
     }
 
     impeg2d_dec_pic_data_thread(ps_dec);
 
     // wait for threads to complete
-    for(i=0; i < (ps_dec->i4_num_cores - 1); i++)
+    for(i=1; i < ps_dec->i4_num_cores; i++)
     {
-        if(ps_dec_state_multi_core->au4_thread_launched[i + 1] == 1)
+        if(ps_dec_state_multi_core->au4_thread_launched[i])
         {
-            ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1];
+            ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i];
+#ifdef KEEP_THREADS_ACTIVE
+            i4_status = ithread_mutex_lock(ps_dec_thd->pv_proc_done_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != i4_status) return;
+
+            while(!ps_dec_thd->ai4_process_done)
+            {
+                ithread_cond_wait(ps_dec_thd->pv_proc_done_condition,
+                                  ps_dec_thd->pv_proc_done_mutex);
+            }
+            ps_dec_thd->ai4_process_done = 0;
+            i4_status = ithread_mutex_unlock(ps_dec_thd->pv_proc_done_mutex);
+            if((IMPEG2D_ERROR_CODES_T)IV_SUCCESS != i4_status) return;
+#else
             ithread_join(ps_dec_thd->pv_codec_thread_handle, NULL);
+            ps_dec_state_multi_core->au4_thread_launched[i] = 0;
+#endif
         }
     }
 
diff --git a/decoder/impeg2d_structs.h b/decoder/impeg2d_structs.h
index 57182c1..6defcf6 100644
--- a/decoder/impeg2d_structs.h
+++ b/decoder/impeg2d_structs.h
@@ -316,6 +316,38 @@
 
     void            *pv_codec_thread_handle;
     void            *ps_dec_state_multi_core;
+#ifdef KEEP_THREADS_ACTIVE
+    UWORD32         currThreadId;
+    /**
+     * Condition variable to signal process start
+     */
+    void *pv_proc_start_condition;
+
+    /**
+     * Mutex used to keep the functions thread-safe
+     */
+    void *pv_proc_start_mutex;
+
+    /**
+     * Condition variable to signal process done
+     */
+    void *pv_proc_done_condition;
+
+    /**
+     * Mutex used to keep the functions thread-safe
+     */
+    void *pv_proc_done_mutex;
+
+    /**
+     * Process state start- One for each thread
+     */
+    WORD32 ai4_process_start;
+
+    /**
+     * Process state end- One for each thread
+     */
+    WORD32 ai4_process_done;
+#endif
     UWORD32         u4_inp_ts;
     pic_buf_t       *ps_cur_pic;
     pic_buf_t       *ps_disp_pic;
@@ -414,7 +446,8 @@
 
 typedef struct _dec_state_multi_core
 {
-    // contains the decoder state of decoder for each thread
+    // contains the decoder state of decoder for each of the max (MAX_THREADS-1) threads
+    // ps_dec_state[0] and au4_thread_launched[0] are used for main thread
     dec_state_t *ps_dec_state[MAX_THREADS];
     UWORD32     au4_thread_launched[MAX_THREADS];
     // number of rows: first thread will populate the row offsets and update
@@ -422,6 +455,12 @@
     // and start decoding
     UWORD32     au4_row_offset[MAX_MB_ROWS];
     volatile    UWORD32 u4_row_offset_cnt;
+#ifdef KEEP_THREADS_ACTIVE
+    /**
+     * Flag to signal processing thread to exit
+     */
+    WORD32 i4_break_threads;
+#endif
 }dec_state_multi_core_t;