fbdev: copyarea function taught to fully support swapped pixel order in byte

This correct case, when source and destination X coordinates difference is n
multiple of pixels in byte.  This is probably rare case, but this case should
supported for completeness.

Reorganization of FB_READL and FB_WRITEL calls results in code size decrease
for normal build without swapping support and size with support enabled is
reasonable too.

[adaplas]
Add missing fb_rev_pixels_in_long() prototype.

Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Signed-off-by: Antonino Daplas <adaplas@gmail.com>
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/video/fb_draw.h b/drivers/video/fb_draw.h
index 816843f..cdafbe1 100644
--- a/drivers/video/fb_draw.h
+++ b/drivers/video/fb_draw.h
@@ -72,6 +72,26 @@
 #endif
 
 #ifdef CONFIG_FB_CFB_REV_PIXELS_IN_BYTE
+#if BITS_PER_LONG == 64
+#define REV_PIXELS_MASK1 0x5555555555555555ul
+#define REV_PIXELS_MASK2 0x3333333333333333ul
+#define REV_PIXELS_MASK4 0x0f0f0f0f0f0f0f0ful
+#else
+#define REV_PIXELS_MASK1 0x55555555ul
+#define REV_PIXELS_MASK2 0x33333333ul
+#define REV_PIXELS_MASK4 0x0f0f0f0ful
+#endif
+
+static inline unsigned long fb_rev_pixels_in_long(unsigned long val,
+						  u32 bswapmask)
+{
+	if (bswapmask & 1)
+		val = comp(val >> 1, val << 1, REV_PIXELS_MASK1);
+	if (bswapmask & 2)
+		val = comp(val >> 2, val << 2, REV_PIXELS_MASK2);
+	if (bswapmask & 3)
+		val = comp(val >> 4, val << 4, REV_PIXELS_MASK4);
+}
 
 static inline u32 fb_shifted_pixels_mask_u32(u32 index, u32 bswapmask)
 {
@@ -131,6 +151,12 @@
 
 #else /* CONFIG_FB_CFB_REV_PIXELS_IN_BYTE */
 
+static inline unsigned long fb_rev_pixels_in_long(unsigned long val,
+						  u32 bswapmask)
+{
+	return val;
+}
+
 #define fb_shifted_pixels_mask_u32(i, b) FB_SHIFT_HIGH(~(u32)0, (i))
 #define fb_shifted_pixels_mask_long(i, b) FB_SHIFT_HIGH(~0UL, (i))
 #define fb_compute_bswapmask(...) 0