DO NOT MERGE - Merge RQ3A.210605.005

Bug: 190855093
Merged-In: Id5383fd19f523a6eeb95f2b15ab0141273bd02e7
Change-Id: Ia697d0aa372863e57f8f9be8b56a45af9ec62348
diff --git a/common/arm/ihevc_resi_trans_neon.c b/common/arm/ihevc_resi_trans_neon.c
index 280b8e9..bf9c058 100644
--- a/common/arm/ihevc_resi_trans_neon.c
+++ b/common/arm/ihevc_resi_trans_neon.c
@@ -66,10 +66,9 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag)
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane)
 {
-    WORD32 chroma_flag = dst_strd_chr_flag & 1;
-    WORD32 dst_strd = dst_strd_chr_flag >> 16;
     UWORD32 sad;
     uint8x16_t inp_buf, pred_buf;
     int16x8_t diff_1, diff_2;
@@ -86,15 +85,15 @@
     uint64x2_t c;
 
     (void)pi4_temp;
-    if(chroma_flag == 0)
+    if(e_chroma_plane == NULL_PLANE)
     {
         inp_buf = load_unaligned_u8q(pu1_src, src_strd);
         pred_buf = load_unaligned_u8q(pu1_pred, pred_strd);
     }
     else
     {
-        inp_buf = load_unaligned_u8qi(pu1_src, src_strd);
-        pred_buf = load_unaligned_u8qi(pu1_pred, pred_strd);
+        inp_buf = load_unaligned_u8qi(pu1_src + e_chroma_plane, src_strd);
+        pred_buf = load_unaligned_u8qi(pu1_pred + e_chroma_plane, pred_strd);
     }
 
     abs = vabdl_u8(vget_low_u8(inp_buf), vget_low_u8(pred_buf));
@@ -198,9 +197,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
- *  0 - luma transform, 1 - chroma transform. Not used for 4x4ttyppe1
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  block sad
  *
@@ -216,9 +217,9 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag)
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane)
 {
-    WORD32 dst_strd;
     UWORD32 sad;
     int16x4_t src0_4x16b;
     int16x4_t src1_4x16b;
@@ -242,7 +243,7 @@
     uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8));
     uint32x4_t b;
     uint64x2_t c;
-
+    UNUSED(e_chroma_plane);
     abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(pred_u8));
     b = vpaddlq_u16(abs);
     c = vpaddlq_u32(b);
@@ -251,7 +252,6 @@
             0);
 
     (void)pi4_temp;
-    dst_strd = dst_strd_chr_flag >> 16;
 
     /*************************    4x4 16bit Transpose  ***********************/
     src0_4x16b = vget_low_s16(src_reg0);
@@ -379,8 +379,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -396,7 +399,8 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag)
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane)
 {
     int16x8_t diff_16[8];
     int16x8_t abs = vdupq_n_s16(0);
@@ -404,13 +408,11 @@
     int64x2_t tmp_b;
     int32x2_t sad_v;
     int32x4x2_t a0, a1, a2, a3, a4, a5, a6, a7;
-    int chroma_flag = dst_strd_chr_flag & 1;
-    int dst_strd = dst_strd_chr_flag >> 16;
     UWORD32 sad;
 
     (void)pi4_temp;
-#define RESIDUE(k, is_chroma)                                                                      \
-    if(!is_chroma)                                                                                 \
+#define RESIDUE(k)                                                                                 \
+    if(NULL_PLANE == e_chroma_plane)                                                               \
     {                                                                                              \
         const uint8x8_t s##k = vld1_u8(pu1_src);                                                   \
         const uint8x8_t p##k = vld1_u8(pu1_pred);                                                  \
@@ -421,8 +423,8 @@
     }                                                                                              \
     else                                                                                           \
     {                                                                                              \
-        const uint8x8_t s##k = vld2_u8(pu1_src).val[0];                                            \
-        const uint8x8_t p##k = vld2_u8(pu1_pred).val[0];                                           \
+        const uint8x8_t s##k = vld2_u8(pu1_src).val[e_chroma_plane];                               \
+        const uint8x8_t p##k = vld2_u8(pu1_pred).val[e_chroma_plane];                              \
         diff_16[k] = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k));                                  \
         pu1_src += src_strd;                                                                       \
         pu1_pred += pred_strd;                                                                     \
@@ -430,14 +432,14 @@
     }
 
     // stage 1
-    RESIDUE(0, chroma_flag);
-    RESIDUE(1, chroma_flag);
-    RESIDUE(2, chroma_flag);
-    RESIDUE(3, chroma_flag);
-    RESIDUE(4, chroma_flag);
-    RESIDUE(5, chroma_flag);
-    RESIDUE(6, chroma_flag);
-    RESIDUE(7, chroma_flag);
+    RESIDUE(0);
+    RESIDUE(1);
+    RESIDUE(2);
+    RESIDUE(3);
+    RESIDUE(4);
+    RESIDUE(5);
+    RESIDUE(6);
+    RESIDUE(7);
 
     tmp_a = vpaddlq_s16(abs);
     tmp_b = vpaddlq_s32(tmp_a);
@@ -792,11 +794,12 @@
     return sad;
 }
 
-static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chroma)
+static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b,
+                        CHROMA_PLANE_ID_T e_chroma_plane)
 {
     int i;
 
-    if(is_chroma == 0)
+    if(e_chroma_plane == NULL_PLANE)
     {
         for (i = 0; i < 16; i++)
         {
@@ -808,7 +811,7 @@
     {
         for (i = 0; i < 16; i++)
         {
-            b[i] = vld2_u8(a).val[0];
+            b[i] = vld2_u8(a).val[e_chroma_plane];
             a += stride;
         }
     }
@@ -1261,8 +1264,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -1278,12 +1284,11 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag)
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane)
 {
     UWORD32 u4_blk_sad = 0;
     WORD32 chroma_flag;
-    WORD32 dst_strd;
-
     uint8x8_t temp0[16], temp1[16];
     int16x8_t temp2[16], temp3[16];
     int32x4_t tmp_a, tmp_b;
@@ -1292,21 +1297,19 @@
     int32x4x2_t out0[16], out1[16], temp4[16], temp5[16];
 
     (void)pi4_temp;
-    chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
-
+    chroma_flag = e_chroma_plane != NULL_PLANE;
     /* Residue + Forward Transform 1st stage */
     // Left half.
-    load(pu1_src, src_strd, temp0, chroma_flag);
-    load(pu1_pred, pred_strd, temp1, chroma_flag);
+    load(pu1_src, src_strd, temp0, e_chroma_plane);
+    load(pu1_pred, pred_strd, temp1, e_chroma_plane);
 
     tmp_a = diff(temp0, temp1, temp2);
     cross_input_16(temp2, temp3);
     dct_body_16_32(temp3, out0);
 
     // Right half.
-    load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, chroma_flag);
-    load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, chroma_flag);
+    load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, e_chroma_plane);
+    load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, e_chroma_plane);
 
     tmp_b = diff(temp0, temp1, temp2);
     cross_input_16(temp2, temp3);
diff --git a/common/arm/ihevc_resi_trans_neon_32x32.c b/common/arm/ihevc_resi_trans_neon_32x32.c
index 5270f80..67f742c 100644
--- a/common/arm/ihevc_resi_trans_neon_32x32.c
+++ b/common/arm/ihevc_resi_trans_neon_32x32.c
@@ -86,8 +86,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -98,18 +101,17 @@
  */
 UWORD32 ihevc_resi_trans_32x32_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
     WORD32 *pi4_temp, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag)
+    WORD32 dst_strd, CHROMA_PLANE_ID_T e_chroma_plane)
 {
     int16x8_t diff_16[4][2];
     WORD32 i;
     int32x2_t sad;
     int64x2_t tmp_a;
     UWORD32 u4_blk_sad = 0;
-    WORD32 dst_strd = dst_strd_chr_flag >> 16;
     WORD32 *pi4_temp_orig = pi4_temp;
     int16x8_t abs = vdupq_n_s16(0);
     int32x4_t sum_val = vdupq_n_s32(0);
-
+    UNUSED(e_chroma_plane);
 
     // Stage 1
     for(i = 0; i < 16; i++)
diff --git a/common/ihevc_defs.h b/common/ihevc_defs.h
index 58761eb..faa3704 100644
--- a/common/ihevc_defs.h
+++ b/common/ihevc_defs.h
@@ -133,6 +133,13 @@
     CHROMA_FMT_IDC_YUV444_PLANES = 4,
 };
 
+typedef enum
+{
+    NULL_PLANE = -1,
+    U_PLANE = 0,
+    V_PLANE = 1
+} CHROMA_PLANE_ID_T;
+
 /* Pred Modes */
 /* Do not change enum values */
 enum
diff --git a/common/ihevc_resi_trans.c b/common/ihevc_resi_trans.c
index e1537cc..d1c2470 100644
--- a/common/ihevc_resi_trans.c
+++ b/common/ihevc_resi_trans.c
@@ -81,8 +81,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  *
  * @returns  Void
@@ -99,7 +102,8 @@
                                  WORD16 *pi2_dst,
                                  WORD32 src_strd,
                                  WORD32 pred_strd,
-                                    WORD32 dst_strd_chr_flag)
+                                 WORD32 dst_strd,
+                                 CHROMA_PLANE_ID_T e_chroma_plane)
 {
     WORD32 i, c[4];
     WORD32 add, shift;
@@ -107,11 +111,7 @@
     WORD32 *pi4_tmp_orig;
     WORD16 *pi2_dst_orig;
     UWORD32  u4_blk_sad = 0;
- //   WORD32 chroma_flag;
-    WORD32 dst_strd;
-
- //   chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
+    UNUSED(e_chroma_plane);
 
     pi2_dst_orig = pi2_dst;
     pi4_tmp_orig = pi4_temp;
@@ -216,8 +216,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -233,7 +236,8 @@
                           WORD16 *pi2_dst,
                           WORD32 src_strd,
                           WORD32 pred_strd,
-                          WORD32 dst_strd_chr_flag)
+                          WORD32 dst_strd,
+                          CHROMA_PLANE_ID_T e_chroma_plane)
 {
     WORD32 i;
     WORD32 e[2], o[2];
@@ -242,11 +246,14 @@
     WORD32 *pi4_tmp_orig;
     WORD16 *pi2_dst_orig;
     UWORD32 u4_blk_sad=0;
-    WORD32 chroma_flag;
-    WORD32 dst_strd;
+    WORD32 chroma_flag = 0;
 
-    chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
+    if (e_chroma_plane != NULL_PLANE)
+    {
+        chroma_flag = 1;
+        pu1_src += e_chroma_plane;
+        pu1_pred += e_chroma_plane;
+    }
 
     pi2_dst_orig = pi2_dst;
     pi4_tmp_orig = pi4_temp;
@@ -427,8 +434,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -444,7 +454,8 @@
                           WORD16 *pi2_dst,
                           WORD32 src_strd,
                           WORD32 pred_strd,
-                          WORD32 dst_strd_chr_flag)
+                          WORD32 dst_strd,
+                          CHROMA_PLANE_ID_T e_chroma_plane)
 {
     WORD32 i, k;
     WORD32 e[4], o[4];
@@ -455,11 +466,14 @@
 //    WORD16 *pi2_tmp;
     WORD16 *pi2_dst_orig;
     UWORD32 u4_blk_sad=0;
-    WORD32 chroma_flag;
-    WORD32 dst_strd;
+    WORD32 chroma_flag = 0;
 
-    chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
+    if (e_chroma_plane != NULL_PLANE)
+    {
+        chroma_flag = 1;
+        pu1_src += e_chroma_plane;
+        pu1_pred += e_chroma_plane;
+    }
 
     pi2_dst_orig = pi2_dst;
     pi4_tmp_orig = pi4_temp;
@@ -724,8 +738,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -741,7 +758,8 @@
                             WORD16 *pi2_dst,
                             WORD32 src_strd,
                             WORD32 pred_strd,
-                            WORD32 dst_strd_chr_flag)
+                            WORD32 dst_strd,
+                            CHROMA_PLANE_ID_T e_chroma_plane)
 {
     WORD32 i, k;
     WORD32 e[8], o[8];
@@ -752,11 +770,14 @@
     WORD32 *pi4_tmp_orig;
     WORD16 *pi2_dst_orig;
     UWORD32 u4_blk_sad = 0;
-    WORD32 chroma_flag;
-    WORD32 dst_strd;
+    WORD32 chroma_flag = 0;
 
-    chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
+    if (e_chroma_plane != NULL_PLANE)
+    {
+        chroma_flag = 1;
+        pu1_src += e_chroma_plane;
+        pu1_pred += e_chroma_plane;
+    }
 
     pi2_dst_orig = pi2_dst;
     pi4_tmp_orig = pi4_temp;
@@ -1056,8 +1077,11 @@
  * @param[in] pred_strd
  *  Prediction Stride
  *
- * @param[in] dst_strd_chr_flag
- *  Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] e_chroma_plane
+ *  Enum singalling chroma plane
  *
  * @returns  Void
  *
@@ -1073,7 +1097,8 @@
                             WORD16 *pi2_dst,
                             WORD32 src_strd,
                             WORD32 pred_strd,
-                            WORD32 dst_strd_chr_flag)
+                            WORD32 dst_strd,
+                            CHROMA_PLANE_ID_T e_chroma_plane)
 {
     WORD32 i, k;
     WORD32 e[16], o[16];
@@ -1085,11 +1110,7 @@
     WORD32 *pi4_tmp_orig;
     WORD16 *pi2_dst_orig;
     UWORD32 u4_blk_sad = 0 ;
-    WORD32 chroma_flag;
-    WORD32 dst_strd;
-
-    chroma_flag = dst_strd_chr_flag & 1;
-    dst_strd = dst_strd_chr_flag >> 16;
+    UNUSED(e_chroma_plane);
 
     pi2_dst_orig = pi2_dst;
     pi4_tmp_orig = pi4_temp;
diff --git a/common/ihevc_resi_trans.h b/common/ihevc_resi_trans.h
index 3ca184b..9c40fa1 100644
--- a/common/ihevc_resi_trans.h
+++ b/common/ihevc_resi_trans.h
@@ -42,7 +42,8 @@
                                     WORD16 *pi2_dst,
                                     WORD32 src_strd,
                                     WORD32 pred_strd,
-                                    WORD32 dst_strd_chr_flag);
+                                    WORD32 dst_strd,
+                                    CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src,
                                     UWORD16 *pu2_pred,
@@ -50,7 +51,8 @@
                                     WORD16 *pi2_dst,
                                     WORD32 src_strd,
                                     WORD32 pred_strd,
-                                    WORD32 dst_strd_chr_flag,
+                                    WORD32 dst_strd,
+                                    CHROMA_PLANE_ID_T e_chroma_plane,
                                     UWORD8 bit_depth);
 
 typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src,
@@ -59,7 +61,8 @@
                              WORD16 *pi2_dst,
                              WORD32 src_strd,
                              WORD32 pred_strd,
-                             WORD32 dst_strd_chr_flag);
+                             WORD32 dst_strd,
+                             CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef UWORD32 ihevc_hbd_resi_trans_4x4_ft
     (
@@ -79,7 +82,8 @@
                              WORD16 *pi2_dst,
                              WORD32 src_strd,
                              WORD32 pred_strd,
-                             WORD32 dst_strd_chr_flag);
+                             WORD32 dst_strd,
+                             CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef UWORD32 ihevc_hbd_resi_trans_8x8_ft
     (
@@ -100,7 +104,8 @@
                                WORD16 *pi2_dst,
                                WORD32 src_strd,
                                WORD32 pred_strd,
-                               WORD32 dst_strd_chr_flag);
+                               WORD32 dst_strd,
+                               CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src,
                                UWORD16 *pu2_pred,
@@ -108,7 +113,8 @@
                                WORD16 *pi2_dst,
                                WORD32 src_strd,
                                WORD32 pred_strd,
-                               WORD32 dst_strd_chr_flag,
+                               WORD32 dst_strd,
+                               CHROMA_PLANE_ID_T e_chroma_plane,
                                UWORD8 bit_depth);
 
 typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src,
@@ -117,7 +123,8 @@
                                WORD16 *pi2_dst,
                                WORD32 src_strd,
                                WORD32 pred_strd,
-                               WORD32 dst_strd_chr_flag);
+                               WORD32 dst_strd,
+                               CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src,
                                UWORD16 *pu2_pred,
@@ -125,7 +132,8 @@
                                WORD16 *pi2_dst,
                                WORD32 src_strd,
                                WORD32 pred_strd,
-                               WORD32 dst_strd_chr_flag,
+                               WORD32 dst_strd,
+                               CHROMA_PLANE_ID_T e_chroma_plane,
                                UWORD8 bit_depth);
 
 
diff --git a/encoder/arm/ihevce_ssd_calculator_neon.c b/encoder/arm/ihevce_ssd_calculator_neon.c
index d62bcfc..9cb659c 100644
--- a/encoder/arm/ihevce_ssd_calculator_neon.c
+++ b/encoder/arm/ihevce_ssd_calculator_neon.c
@@ -54,21 +54,22 @@
 /* Function Definitions                                                      */
 /*****************************************************************************/
 static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon(
-    UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 is_chroma)
+    UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd,
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd_low, ssd_high;
     uint8x16_t src, pred, abs;
     uint16x8_t sqabs_low, sqabs_high;
 
-    if(!is_chroma)
+    if(chroma_plane == NULL_PLANE)
     {
         src = load_unaligned_u8q(pu1_src, src_strd);
         pred = load_unaligned_u8q(pu1_pred, pred_strd);
     }
     else
     {
-        src = load_unaligned_u8qi(pu1_src, src_strd);
-        pred = load_unaligned_u8qi(pu1_pred, pred_strd);
+        src = load_unaligned_u8qi(pu1_src + chroma_plane, src_strd);
+        pred = load_unaligned_u8qi(pu1_pred + chroma_plane, pred_strd);
     }
     abs = vabdq_u8(src, pred);
     sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
@@ -80,21 +81,22 @@
 }
 
 static INLINE uint32x4_t
-    ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+    ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd_val;
     uint8x8_t src, pred, abs;
     uint16x8_t sqabs;
 
-    if(!is_chroma)
+    if(chroma_plane == NULL_PLANE)
     {
         src = vld1_u8(pu1_src);
         pred = vld1_u8(pu1_pred);
     }
     else
     {
-        src = vld2_u8(pu1_src).val[0];
-        pred = vld2_u8(pu1_pred).val[0];
+        src = vld2_u8(pu1_src).val[chroma_plane];
+        pred = vld2_u8(pu1_pred).val[chroma_plane];
     }
     abs = vabd_u8(src, pred);
     sqabs = vmull_u8(abs, abs);
@@ -104,21 +106,22 @@
 }
 
 static INLINE uint32x4_t
-    ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+    ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd_low, ssd_high;
     uint8x16_t src, pred, abs;
     uint16x8_t sqabs_low, sqabs_high;
 
-    if(!is_chroma)
+    if(chroma_plane == NULL_PLANE)
     {
         src = vld1q_u8(pu1_src);
         pred = vld1q_u8(pu1_pred);
     }
     else
     {
-        src = vld2q_u8(pu1_src).val[0];
-        pred = vld2q_u8(pu1_pred).val[0];
+        src = vld2q_u8(pu1_src).val[chroma_plane];
+        pred = vld2q_u8(pu1_pred).val[chroma_plane];
     }
     abs = vabdq_u8(src, pred);
     sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
@@ -130,13 +133,14 @@
 }
 
 static INLINE uint32x4_t
-    ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+    ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
     uint8x16_t src_0, pred_0, src_1, pred_1, abs_0, abs_1;
     uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
 
-    if(!is_chroma)
+    if(chroma_plane == NULL_PLANE)
     {
         src_0 = vld1q_u8(pu1_src);
         pred_0 = vld1q_u8(pu1_pred);
@@ -145,10 +149,10 @@
     }
     else
     {
-        src_0 = vld2q_u8(pu1_src).val[0];
-        pred_0 = vld2q_u8(pu1_pred).val[0];
-        src_1 = vld2q_u8(pu1_src + 32).val[0];
-        pred_1 = vld2q_u8(pu1_pred + 32).val[0];
+        src_0 = vld2q_u8(pu1_src).val[chroma_plane];
+        pred_0 = vld2q_u8(pu1_pred).val[chroma_plane];
+        src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane];
+        pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane];
     }
     abs_0 = vabdq_u8(src_0, pred_0);
     abs_1 = vabdq_u8(src_1, pred_1);
@@ -167,7 +171,8 @@
 }
 
 static INLINE uint32x4_t
-    ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+    ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
     uint32x4_t ssd_4, ssd_5, ssd_6, ssd_7;
@@ -177,7 +182,7 @@
     uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
     uint16x8_t sqabs_4, sqabs_5, sqabs_6, sqabs_7;
 
-    if(!is_chroma)
+    if(chroma_plane == NULL_PLANE)
     {
         src_0 = vld1q_u8(pu1_src);
         pred_0 = vld1q_u8(pu1_pred);
@@ -190,14 +195,14 @@
     }
     else
     {
-        src_0 = vld2q_u8(pu1_src).val[0];
-        pred_0 = vld2q_u8(pu1_pred).val[0];
-        src_1 = vld2q_u8(pu1_src + 32).val[0];
-        pred_1 = vld2q_u8(pu1_pred + 32).val[0];
-        src_2 = vld2q_u8(pu1_src + 64).val[0];
-        pred_2 = vld2q_u8(pu1_pred + 64).val[0];
-        src_3 = vld2q_u8(pu1_src + 96).val[0];
-        pred_3 = vld2q_u8(pu1_pred + 96).val[0];
+        src_0 = vld2q_u8(pu1_src).val[chroma_plane];
+        pred_0 = vld2q_u8(pu1_pred).val[chroma_plane];
+        src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane];
+        pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane];
+        src_2 = vld2q_u8(pu1_src + 64).val[chroma_plane];
+        pred_2 = vld2q_u8(pu1_pred + 64).val[chroma_plane];
+        src_3 = vld2q_u8(pu1_src + 96).val[chroma_plane];
+        pred_3 = vld2q_u8(pu1_pred + 96).val[chroma_plane];
     }
     abs_0 = vabdq_u8(src_0, pred_0);
     abs_1 = vabdq_u8(src_1, pred_1);
@@ -236,7 +241,7 @@
     UWORD32 ref_stride,
     UWORD32 wd,
     UWORD32 ht,
-    WORD32 is_chroma)
+    CHROMA_PLANE_ID_T chroma_plane)
 {
     uint32x4_t ssd = vdupq_n_u32(0);
     uint32x2_t sum;
@@ -248,13 +253,13 @@
         for(row = ht; row > 0; row--)
         {
             if(wd == 8)
-                ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+                ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
             else if(wd == 16)
-                ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+                ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
             else if(wd == 32)
-                ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+                ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
             else if(wd == 64)
-                ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+                ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
             else if(wd % 8 == 0)
             {
                 UWORD32 col;
@@ -262,7 +267,7 @@
 
                 for(col = 0; col < wd; col += 8)
                 {
-                    ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, is_chroma));
+                    ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, chroma_plane));
                     ref = ref + 8;
                     inp = inp + 8;
                 }
@@ -275,7 +280,7 @@
     else if(wd == 4)
     {
         assert(ht == 4);
-        ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, is_chroma);
+        ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, chroma_plane);
     }
 
     sum = vadd_u32(vget_low_u32(ssd), vget_high_u32(ssd));
@@ -283,13 +288,17 @@
 }
 
 LWORD64 ihevce_ssd_calculator_neon(
-    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
+    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
 {
-    return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 0);
+    return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht,
+                                            chroma_plane);
 }
 
 LWORD64 ihevce_chroma_interleave_ssd_calculator_neon(
-    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
+    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
 {
-    return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 1);
+    return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht,
+                                            chroma_plane);
 }
diff --git a/encoder/ihevce_cmn_utils_instr_set_router.h b/encoder/ihevce_cmn_utils_instr_set_router.h
index be7554f..f385372 100644
--- a/encoder/ihevce_cmn_utils_instr_set_router.h
+++ b/encoder/ihevce_cmn_utils_instr_set_router.h
@@ -40,6 +40,7 @@
 #define __IHEVCE_CMN_UTILS_INSTR_SET_ROUTER_H_
 
 #include "ihevc_typedefs.h"
+#include "ihevc_defs.h"
 #include "ihevce_defs.h"
 
 /*****************************************************************************/
@@ -47,7 +48,8 @@
 /*****************************************************************************/
 typedef UWORD32 FT_CALC_HAD_SATD_8BIT(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD16 *, WORD32);
 
-typedef LWORD64 FT_SSD_CALCULATOR(UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32);
+typedef LWORD64 FT_SSD_CALCULATOR(
+    UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32, CHROMA_PLANE_ID_T);
 
 typedef LWORD64 FT_SSD_AND_SAD_CALCULATOR(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, UWORD32 *);
 
diff --git a/encoder/ihevce_common_utils.c b/encoder/ihevce_common_utils.c
index dd99132..e7fb036 100644
--- a/encoder/ihevce_common_utils.c
+++ b/encoder/ihevce_common_utils.c
@@ -858,11 +858,12 @@
 *******************************************************************************
 */
 LWORD64 ihevce_ssd_calculator(
-    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
+    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
 {
     UWORD32 i, j;
     LWORD64 ssd = 0;
-
+    UNUSED(chroma_plane);
     for(i = 0; i < ht; i++)
     {
         for(j = 0; j < wd; j++)
@@ -910,10 +911,13 @@
 *******************************************************************************
 */
 LWORD64 ihevce_chroma_interleave_ssd_calculator(
-    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+    UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
+    UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
 {
     UWORD32 i, j;
     LWORD64 ssd = 0;
+    pu1_inp += chroma_plane;
+    pu1_ref += chroma_plane;
 
     /* run a loop and find the ssd by doing diff followed by square */
     for(i = 0; i < ht; i++)
diff --git a/encoder/ihevce_defs.h b/encoder/ihevce_defs.h
index ffc35e3..23871ea 100644
--- a/encoder/ihevce_defs.h
+++ b/encoder/ihevce_defs.h
@@ -967,13 +967,6 @@
 
 } REF_LISTS_t;
 
-typedef enum
-{
-    NULL_PLANE = -1,
-    U_PLANE = 0,
-    V_PLANE = 1
-} CHROMA_PLANE_ID_T;
-
 typedef enum SSD_TYPE_T
 {
     NULL_TYPE = -1,
diff --git a/encoder/ihevce_enc_loop_structs.h b/encoder/ihevce_enc_loop_structs.h
index 74417c7..0768871 100644
--- a/encoder/ihevce_enc_loop_structs.h
+++ b/encoder/ihevce_enc_loop_structs.h
@@ -215,7 +215,8 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag);
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane);
 
 /** \breif function pointer prototype for quantization and inv Quant for ssd
 calc. for all transform sizes */
diff --git a/encoder/ihevce_enc_loop_utils.c b/encoder/ihevce_enc_loop_utils.c
index 9aa7bc8..4665cd0 100644
--- a/encoder/ihevce_enc_loop_utils.c
+++ b/encoder/ihevce_enc_loop_utils.c
@@ -2188,7 +2188,8 @@
             pi2_trans_values,
             src_strd,
             pred_strd,
-            ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */
+            trans_size,
+            NULL_PLANE);
 
         cbf = ps_ctxt->apf_quant_iquant_ssd
                   [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
@@ -2297,7 +2298,7 @@
             zero_cbf_cost =
 
                 ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
-                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size);
+                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
         }
 
         /************************************************************************/
@@ -7601,7 +7602,8 @@
                                     pred_strd,
                                     chrm_src_stride,
                                     trans_size,
-                                    trans_size);
+                                    trans_size,
+                                    U_PLANE);
 
                             if(u1_compute_spatial_ssd)
                             {
@@ -7861,12 +7863,13 @@
                             curr_cr_cod_cost = trans_ssd_v =
 
                                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
-                                    pu1_cur_pred + 1,
-                                    pu1_cur_src + 1,
+                                    pu1_cur_pred,
+                                    pu1_cur_src,
                                     pred_strd,
                                     chrm_src_stride,
                                     trans_size,
-                                    trans_size);
+                                    trans_size,
+                                    V_PLANE);
 
                             if(u1_compute_spatial_ssd)
                             {
@@ -10487,7 +10490,8 @@
             i4_zero_row);
 
         return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
-            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size);
+            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
+            e_chroma_plane);
     }
     else
     {
@@ -10507,12 +10511,13 @@
             e_chroma_plane);
 
         return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
-            pu1_recon + (e_chroma_plane == V_PLANE),
-            pu1_src + (e_chroma_plane == V_PLANE),
+            pu1_recon,
+            pu1_src,
             i4_recon_stride,
             i4_src_strd,
             u1_trans_size,
-            u1_trans_size);
+            u1_trans_size,
+            e_chroma_plane);
     }
 }
 
@@ -10628,12 +10633,13 @@
     if(u1_is_skip)
     {
         pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
-            pu1_pred + e_chroma_plane,
-            pu1_src + e_chroma_plane,
+            pu1_pred,
+            pu1_src,
             pred_strd,
             src_strd,
             trans_size,
-            trans_size);
+            trans_size,
+            e_chroma_plane);
 
         if(e_ssd_type == SPATIAL_DOMAIN_SSD)
         {
@@ -10735,13 +10741,14 @@
 
     /* ---------- call residue and transform block ------- */
     u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
-        pu1_src + (e_chroma_plane == V_PLANE),
-        pu1_pred + (e_chroma_plane == V_PLANE),
+        pu1_src,
+        pu1_pred,
         pi4_trans_scratch,
         pi2_trans_values,
         src_strd,
         pred_strd,
-        ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */
+        trans_size,
+        e_chroma_plane);
     (void)u4_blk_sad;
     /* -------- calculate SSD calculation in Transform Domain ------ */
 
@@ -10855,12 +10862,13 @@
             zero_cbf_cost_u =
 
                 ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
-                    pu1_pred + (e_chroma_plane == V_PLANE),
-                    pu1_src + (e_chroma_plane == V_PLANE),
+                    pu1_pred,
+                    pu1_src,
                     pred_strd,
                     src_strd,
                     trans_size,
-                    trans_size);
+                    trans_size,
+                    e_chroma_plane);
         }
 
         /************************************************************************/
diff --git a/encoder/ihevce_enc_structs.h b/encoder/ihevce_enc_structs.h
index 2c442b9..ac39673 100644
--- a/encoder/ihevce_enc_structs.h
+++ b/encoder/ihevce_enc_structs.h
@@ -221,7 +221,8 @@
     WORD16 *pi2_dst,
     WORD32 src_strd,
     WORD32 pred_strd,
-    WORD32 dst_strd_chr_flag);
+    WORD32 dst_strd,
+    CHROMA_PLANE_ID_T e_chroma_plane);
 
 typedef WORD32 (*pf_quant)(
     WORD16 *pi2_coeffs,
diff --git a/encoder/ihevce_recur_bracketing.c b/encoder/ihevce_recur_bracketing.c
index 2967b37..15d09b4 100644
--- a/encoder/ihevce_recur_bracketing.c
+++ b/encoder/ihevce_recur_bracketing.c
@@ -451,7 +451,7 @@
         if(u1_use_satd)
         {
             ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
-                pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0);
+                pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE);
 
             sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
         }
diff --git a/encoder/ihevce_sao.c b/encoder/ihevce_sao.c
index 4e7ea0a..d658535 100644
--- a/encoder/ihevce_sao.c
+++ b/encoder/ihevce_sao.c
@@ -779,7 +779,7 @@
             distortion =
                 ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
                         s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
-                        s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht);
+                        s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
             // clang-format on
 
             ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
@@ -1145,7 +1145,8 @@
                     ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
                             s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
                             s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
-                            ctb_ht);
+                            ctb_ht,
+                            NULL_PLANE);
             }  // clang-format on
 
             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
@@ -1155,7 +1156,8 @@
                             s_sao_ctxt.pu1_cur_chroma_recon_buf,
                             chroma_src_stride,
                             s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
-                            (ctb_ht >> !u1_is_422));
+                            (ctb_ht >> !u1_is_422),
+                            NULL_PLANE);
             }  // clang-format on
 
             /*chroma distortion is added after correction because of lambda difference*/