fbdev: add drawing functions for framebuffers in system RAM

The generic drawing functions (cfbimgblt, cfbcopyarea, cfbfillrect) assume
that the framebuffer is in IO memory.  However, we have 3 drivers (hecubafb,
arcfb, and vfb) where the framebuffer is allocated from system RAM (via
vmalloc). Using _raw_read/write and family for these drivers (as used in
the cfb* functions) is illegal, especially in other platforms.

Create 3 new drawing functions, based almost entirely from the original
except that the framebuffer memory is assumed to be in system RAM.
These are named as sysimgblt, syscopyarea, and sysfillrect.

Signed-off-by: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/video/sysfillrect.c b/drivers/video/sysfillrect.c
new file mode 100644
index 0000000..10de707
--- /dev/null
+++ b/drivers/video/sysfillrect.c
@@ -0,0 +1,400 @@
+/*
+ *  Generic fillrect for frame buffers in system RAM with packed pixels of
+ *  any depth.
+ *
+ *  Based almost entirely from cfbfillrect.c (which is based almost entirely
+ *  on Geert Uytterhoeven's fillrect routine)
+ *
+ *      Copyright (C)  2007 Antonino Daplas <adaplas@pol.net>
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+#include <asm/types.h>
+
+    /*
+     *  Compose two values, using a bitmask as decision value
+     *  This is equivalent to (a & mask) | (b & ~mask)
+     */
+
+static inline unsigned long
+comp(unsigned long a, unsigned long b, unsigned long mask)
+{
+    return ((a ^ b) & mask) ^ b;
+}
+
+    /*
+     *  Create a pattern with the given pixel's color
+     */
+
+#if BITS_PER_LONG == 64
+static inline unsigned long
+pixel_to_pat( u32 bpp, u32 pixel)
+{
+	switch (bpp) {
+	case 1:
+		return 0xfffffffffffffffful*pixel;
+	case 2:
+		return 0x5555555555555555ul*pixel;
+	case 4:
+		return 0x1111111111111111ul*pixel;
+	case 8:
+		return 0x0101010101010101ul*pixel;
+	case 12:
+		return 0x0001001001001001ul*pixel;
+	case 16:
+		return 0x0001000100010001ul*pixel;
+	case 24:
+		return 0x0000000001000001ul*pixel;
+	case 32:
+		return 0x0000000100000001ul*pixel;
+	default:
+		panic("pixel_to_pat(): unsupported pixelformat\n");
+    }
+}
+#else
+static inline unsigned long
+pixel_to_pat( u32 bpp, u32 pixel)
+{
+	switch (bpp) {
+	case 1:
+		return 0xfffffffful*pixel;
+	case 2:
+		return 0x55555555ul*pixel;
+	case 4:
+		return 0x11111111ul*pixel;
+	case 8:
+		return 0x01010101ul*pixel;
+	case 12:
+		return 0x00001001ul*pixel;
+	case 16:
+		return 0x00010001ul*pixel;
+	case 24:
+		return 0x00000001ul*pixel;
+	case 32:
+		return 0x00000001ul*pixel;
+	default:
+		panic("pixel_to_pat(): unsupported pixelformat\n");
+    }
+}
+#endif
+
+    /*
+     *  Aligned pattern fill using 32/64-bit memory accesses
+     */
+
+static void
+bitfill_aligned(unsigned long *dst, int dst_idx, unsigned long pat,
+		unsigned n, int bits)
+{
+	unsigned long first, last;
+
+	if (!n)
+		return;
+
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+
+	if (dst_idx+n <= bits) {
+		/* Single word */
+		if (last)
+			first &= last;
+		*dst = comp(pat, *dst, first);
+	} else {
+		/* Multiple destination words */
+
+		/* Leading bits */
+ 		if (first!= ~0UL) {
+			*dst = comp(pat, *dst, first);
+			dst++;
+			n -= bits - dst_idx;
+		}
+
+		/* Main chunk */
+		n /= bits;
+		while (n >= 8) {
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			*dst++ = pat;
+			n -= 8;
+		}
+		while (n--)
+			*dst++ = pat;
+		/* Trailing bits */
+		if (last)
+			*dst = comp(pat, *dst, last);
+	}
+}
+
+
+    /*
+     *  Unaligned generic pattern fill using 32/64-bit memory accesses
+     *  The pattern must have been expanded to a full 32/64-bit value
+     *  Left/right are the appropriate shifts to convert to the pattern to be
+     *  used for the next 32/64-bit word
+     */
+
+static void
+bitfill_unaligned(unsigned long *dst, int dst_idx, unsigned long pat,
+		  int left, int right, unsigned n, int bits)
+{
+	unsigned long first, last;
+
+	if (!n)
+		return;
+
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+
+	if (dst_idx+n <= bits) {
+		/* Single word */
+		if (last)
+			first &= last;
+		*dst = comp(pat, *dst, first);
+	} else {
+		/* Multiple destination words */
+		/* Leading bits */
+		if (first) {
+			*dst = comp(pat, *dst, first);
+			dst++;
+			pat = pat << left | pat >> right;
+			n -= bits - dst_idx;
+		}
+
+		/* Main chunk */
+		n /= bits;
+		while (n >= 4) {
+			*dst++ = pat;
+			pat = pat << left | pat >> right;
+			*dst++ = pat;
+			pat = pat << left | pat >> right;
+			*dst++ = pat;
+			pat = pat << left | pat >> right;
+			*dst++ = pat;
+			pat = pat << left | pat >> right;
+			n -= 4;
+		}
+		while (n--) {
+			*dst++ = pat;
+			pat = pat << left | pat >> right;
+		}
+
+		/* Trailing bits */
+		if (last)
+			*dst = comp(pat, *dst, first);
+	}
+}
+
+    /*
+     *  Aligned pattern invert using 32/64-bit memory accesses
+     */
+static void
+bitfill_aligned_rev(unsigned long *dst, int dst_idx, unsigned long pat,
+		    unsigned n, int bits)
+{
+	unsigned long val = pat;
+	unsigned long first, last;
+
+	if (!n)
+		return;
+
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+
+	if (dst_idx+n <= bits) {
+		/* Single word */
+		if (last)
+			first &= last;
+		*dst = comp(*dst ^ val, *dst, first);
+	} else {
+		/* Multiple destination words */
+		/* Leading bits */
+		if (first!=0UL) {
+			*dst = comp(*dst ^ val, *dst, first);
+			dst++;
+			n -= bits - dst_idx;
+		}
+
+		/* Main chunk */
+		n /= bits;
+		while (n >= 8) {
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			*dst++ ^= val;
+			n -= 8;
+		}
+		while (n--)
+			*dst++ ^= val;
+		/* Trailing bits */
+		if (last)
+			*dst = comp(*dst ^ val, *dst, last);
+	}
+}
+
+
+    /*
+     *  Unaligned generic pattern invert using 32/64-bit memory accesses
+     *  The pattern must have been expanded to a full 32/64-bit value
+     *  Left/right are the appropriate shifts to convert to the pattern to be
+     *  used for the next 32/64-bit word
+     */
+
+static void
+bitfill_unaligned_rev(unsigned long *dst, int dst_idx, unsigned long pat,
+			int left, int right, unsigned n, int bits)
+{
+	unsigned long first, last;
+
+	if (!n)
+		return;
+
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+
+	if (dst_idx+n <= bits) {
+		/* Single word */
+		if (last)
+			first &= last;
+		*dst = comp(*dst ^ pat, *dst, first);
+	} else {
+		/* Multiple destination words */
+
+		/* Leading bits */
+		if (first != 0UL) {
+			*dst = comp(*dst ^ pat, *dst, first);
+			dst++;
+			pat = pat << left | pat >> right;
+			n -= bits - dst_idx;
+		}
+
+		/* Main chunk */
+		n /= bits;
+		while (n >= 4) {
+			*dst++ ^= pat;
+			pat = pat << left | pat >> right;
+			*dst++ ^= pat;
+			pat = pat << left | pat >> right;
+			*dst++ ^= pat;
+			pat = pat << left | pat >> right;
+			*dst++ ^= pat;
+			pat = pat << left | pat >> right;
+			n -= 4;
+		}
+		while (n--) {
+			*dst ^= pat;
+			pat = pat << left | pat >> right;
+		}
+
+		/* Trailing bits */
+		if (last)
+			*dst = comp(*dst ^ pat, *dst, last);
+	}
+}
+
+void sys_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
+{
+	unsigned long pat, fg;
+	unsigned long width = rect->width, height = rect->height;
+	int bits = BITS_PER_LONG, bytes = bits >> 3;
+	u32 bpp = p->var.bits_per_pixel;
+	unsigned long *dst;
+	int dst_idx, left;
+
+	if (p->state != FBINFO_STATE_RUNNING)
+		return;
+
+	if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+	    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
+		fg = ((u32 *) (p->pseudo_palette))[rect->color];
+	else
+		fg = rect->color;
+
+	pat = pixel_to_pat( bpp, fg);
+
+	dst = (unsigned long *)((unsigned long)p->screen_base & ~(bytes-1));
+	dst_idx = ((unsigned long)p->screen_base & (bytes - 1))*8;
+	dst_idx += rect->dy*p->fix.line_length*8+rect->dx*bpp;
+	/* FIXME For now we support 1-32 bpp only */
+	left = bits % bpp;
+	if (p->fbops->fb_sync)
+		p->fbops->fb_sync(p);
+	if (!left) {
+		void (*fill_op32)(unsigned long *dst, int dst_idx,
+		                  unsigned long pat, unsigned n, int bits) =
+			NULL;
+
+		switch (rect->rop) {
+		case ROP_XOR:
+			fill_op32 = bitfill_aligned_rev;
+			break;
+		case ROP_COPY:
+			fill_op32 = bitfill_aligned;
+			break;
+		default:
+			printk( KERN_ERR "cfb_fillrect(): unknown rop, "
+				"defaulting to ROP_COPY\n");
+			fill_op32 = bitfill_aligned;
+			break;
+		}
+		while (height--) {
+			dst += dst_idx >> (ffs(bits) - 1);
+			dst_idx &= (bits - 1);
+			fill_op32(dst, dst_idx, pat, width*bpp, bits);
+			dst_idx += p->fix.line_length*8;
+		}
+	} else {
+		int right;
+		int r;
+		int rot = (left-dst_idx) % bpp;
+		void (*fill_op)(unsigned long *dst, int dst_idx,
+		                unsigned long pat, int left, int right,
+		                unsigned n, int bits) = NULL;
+
+		/* rotate pattern to correct start position */
+		pat = pat << rot | pat >> (bpp-rot);
+
+		right = bpp-left;
+		switch (rect->rop) {
+		case ROP_XOR:
+			fill_op = bitfill_unaligned_rev;
+			break;
+		case ROP_COPY:
+			fill_op = bitfill_unaligned;
+			break;
+		default:
+			printk(KERN_ERR "cfb_fillrect(): unknown rop, "
+				"defaulting to ROP_COPY\n");
+			fill_op = bitfill_unaligned;
+			break;
+		}
+		while (height--) {
+			dst += dst_idx >> (ffs(bits) - 1);
+			dst_idx &= (bits - 1);
+			fill_op(dst, dst_idx, pat, left, right,
+				width*bpp, bits);
+			r = (p->fix.line_length*8) % bpp;
+			pat = pat << (bpp-r) | pat >> r;
+			dst_idx += p->fix.line_length*8;
+		}
+	}
+}
+
+EXPORT_SYMBOL(sys_fillrect);
+
+MODULE_AUTHOR("Antonino Daplas <adaplas@pol.net>");
+MODULE_DESCRIPTION("Generic fill rectangle (sys-to-sys)");
+MODULE_LICENSE("GPL");