Added support for half-precision (fp16) variants.
diff --git a/ref_vld1_lane.c b/ref_vld1_lane.c
index cad2123..903cc19 100644
--- a/ref_vld1_lane.c
+++ b/ref_vld1_lane.c
@@ -47,7 +47,15 @@
   /* With ARM RVCT, we need to declare variables before any executable
      statement */
   DECL_VARIABLE_ALL_VARIANTS(vector);
+#if __ARM_NEON_FP16_INTRINSICS
+  DECL_VARIABLE(vector, float, 16, 4);
+  DECL_VARIABLE(vector, float, 16, 8);
+#endif
   DECL_VARIABLE_ALL_VARIANTS(vector_src);
+#if __ARM_NEON_FP16_INTRINSICS
+  DECL_VARIABLE(vector_src, float, 16, 4);
+  DECL_VARIABLE(vector_src, float, 16, 8);
+#endif
 
   ARRAY(buffer_src, int, 8, 8);
   ARRAY(buffer_src, int, 16, 4);
@@ -60,6 +68,9 @@
   ARRAY(buffer_src, poly, 8, 8);
   ARRAY(buffer_src, poly, 16, 4);
   ARRAY(buffer_src, float, 32, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+  ARRAY(buffer_src, float, 16, 4);
+#endif
 
   ARRAY(buffer_src, int, 8, 16);
   ARRAY(buffer_src, int, 16, 8);
@@ -72,6 +83,9 @@
   ARRAY(buffer_src, poly, 8, 16);
   ARRAY(buffer_src, poly, 16, 8);
   ARRAY(buffer_src, float, 32, 4);
+#if __ARM_NEON_FP16_INTRINSICS
+  ARRAY(buffer_src, float, 16, 8);
+#endif
 
   clean_results ();
 
@@ -87,6 +101,9 @@
   TEST_VLD1_LANE(, poly, p, 8, 8, 7);
   TEST_VLD1_LANE(, poly, p, 16, 4, 3);
   TEST_VLD1_LANE(, float, f, 32, 2, 1);
+#if __ARM_NEON_FP16_INTRINSICS
+  TEST_VLD1_LANE(, float, f, 16, 4, 2);
+#endif
 
   TEST_VLD1_LANE(q, int, s, 8, 16, 15);
   TEST_VLD1_LANE(q, int, s, 16, 8, 5);
@@ -99,6 +116,9 @@
   TEST_VLD1_LANE(q, poly, p, 8, 16, 12);
   TEST_VLD1_LANE(q, poly, p, 16, 8, 6);
   TEST_VLD1_LANE(q, float, f, 32, 4, 2);
+#if __ARM_NEON_FP16_INTRINSICS
+  TEST_VLD1_LANE(q, float, f, 16, 8, 5);
+#endif
 
 #ifndef __CC_ARM
   /* Check runtime assertions. With RVCT, the check is performed at