declare fpu usage to the assembler in arm hard-float asm files

Some armhf gcc toolchains (built with --with-float=hard but without
--with-fpu=vfp*) do not pass -mfpu=vfp to the assembler and then
binutils rejects the UAL mnemonics for VFP unless there is an .fpu vfp
directive in the asm source.
diff --git a/src/fenv/armhf/fenv.s b/src/fenv/armhf/fenv.s
index 387234b..c1ffd2e 100644
--- a/src/fenv/armhf/fenv.s
+++ b/src/fenv/armhf/fenv.s
@@ -1,3 +1,5 @@
+.fpu vfp
+
 .global fegetround
 .type fegetround,%function
 fegetround:
diff --git a/src/math/armhf/fabs.s b/src/math/armhf/fabs.s
index 2bdebff..8a705e1 100644
--- a/src/math/armhf/fabs.s
+++ b/src/math/armhf/fabs.s
@@ -1,3 +1,4 @@
+.fpu vfp
 .text
 .global fabs
 .type   fabs,%function
diff --git a/src/math/armhf/fabsf.s b/src/math/armhf/fabsf.s
index 35c720f..2c7beb6 100644
--- a/src/math/armhf/fabsf.s
+++ b/src/math/armhf/fabsf.s
@@ -1,3 +1,4 @@
+.fpu vfp
 .text
 .global fabsf
 .type   fabsf,%function
diff --git a/src/math/armhf/sqrt.s b/src/math/armhf/sqrt.s
index 99fe64b..90f74a9 100644
--- a/src/math/armhf/sqrt.s
+++ b/src/math/armhf/sqrt.s
@@ -1,3 +1,4 @@
+.fpu vfp
 .text
 .global sqrt
 .type   sqrt,%function
diff --git a/src/math/armhf/sqrtf.s b/src/math/armhf/sqrtf.s
index 9ea519f..91d8ad6 100644
--- a/src/math/armhf/sqrtf.s
+++ b/src/math/armhf/sqrtf.s
@@ -1,3 +1,4 @@
+.fpu vfp
 .text
 .global sqrtf
 .type   sqrtf,%function