ath9k_hw: optimize all descriptor access functions

Because all of the descriptor data structures are marked as __packed, GCC
assumes the worst case wrt. alignment and generates unaligned load/store
instructions on MIPS for access to all fields.
Since descriptors always have to be 4-byte-aligned, we can just mark the
data structures with __aligned(4), which allows GCC to generate much more
efficient code.
Verified through disassembly and OProfile comparisons.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h
index fdc2507..22907e2 100644
--- a/drivers/net/wireless/ath/ath9k/mac.h
+++ b/drivers/net/wireless/ath/ath9k/mac.h
@@ -237,7 +237,7 @@
 	u32 ds_ctl1;
 	u32 ds_hw[20];
 	void *ds_vdata;
-} __packed;
+} __packed __aligned(4);
 
 #define ATH9K_TXDESC_CLRDMASK		0x0001
 #define ATH9K_TXDESC_NOACK		0x0002
@@ -307,7 +307,7 @@
 			u32 status8;
 		} rx;
 	} u;
-} __packed;
+} __packed __aligned(4);
 
 #define AR5416DESC(_ds)         ((struct ar5416_desc *)(_ds))
 #define AR5416DESC_CONST(_ds)   ((const struct ar5416_desc *)(_ds))