i965: Add FS backend for builtin gl_SampleMask[]

V2:
   - Update comments
   - Add a special backend instructions to compute sample_mask.
   - Add a new variable uses_omask in brw_wm_prog_data.

V3:
   - Make changes to support simd16 mode.
   - Delete redundant AND instruction and handle the register
     stride in FS backend instruction.
Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2553098..d67c4e4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1068,6 +1068,44 @@
    brw_pop_insn_state(p);
 }
 
+/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
+ * (when mask is passed as a uniform) of register mask before moving it
+ * to register dst.
+ */
+void
+fs_generator::generate_set_omask(fs_inst *inst,
+                                 struct brw_reg dst,
+                                 struct brw_reg mask)
+{
+   bool stride_8_8_1 =
+    (mask.vstride == BRW_VERTICAL_STRIDE_8 &&
+     mask.width == BRW_WIDTH_8 &&
+     mask.hstride == BRW_HORIZONTAL_STRIDE_1);
+
+   bool stride_0_1_0 =
+    (mask.vstride == BRW_VERTICAL_STRIDE_0 &&
+     mask.width == BRW_WIDTH_1 &&
+     mask.hstride == BRW_HORIZONTAL_STRIDE_0);
+
+   assert(stride_8_8_1 || stride_0_1_0);
+   assert(dst.type == BRW_REGISTER_TYPE_UW);
+
+   if (dispatch_width == 16)
+      dst = vec16(dst);
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+   if (stride_8_8_1) {
+      brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0),
+                                    dst.type), 16, 8, 2));
+   } else if (stride_0_1_0) {
+      brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0),
+                                    dst.type), 0, 1, 0));
+   }
+   brw_pop_insn_state(p);
+}
+
 /* Sets vstride=1, width=4, hstride=0 of register src1 during
  * the ADD instruction.
  */
@@ -1666,6 +1704,10 @@
          generate_set_simd4x2_offset(inst, dst, src[0]);
          break;
 
+      case FS_OPCODE_SET_OMASK:
+         generate_set_omask(inst, dst, src[0]);
+         break;
+
       case FS_OPCODE_SET_SAMPLE_ID:
          generate_set_sample_id(inst, dst, src[0], src[1]);
          break;