fbdev: add drawing functions for framebuffers in system RAM

The generic drawing functions (cfbimgblt, cfbcopyarea, cfbfillrect) assume
that the framebuffer is in IO memory.  However, we have 3 drivers (hecubafb,
arcfb, and vfb) where the framebuffer is allocated from system RAM (via
vmalloc). Using _raw_read/write and family for these drivers (as used in
the cfb* functions) is illegal, especially in other platforms.

Create 3 new drawing functions, based almost entirely from the original
except that the framebuffer memory is assumed to be in system RAM.
These are named as sysimgblt, syscopyarea, and sysfillrect.

Signed-off-by: Antonino Daplas <adaplas@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/video/sysimgblt.c b/drivers/video/sysimgblt.c
new file mode 100644
index 0000000..bd7e7e9
--- /dev/null
+++ b/drivers/video/sysimgblt.c
@@ -0,0 +1,291 @@
+/*
+ *  Generic 1-bit or 8-bit source to 1-32 bit destination expansion
+ *  for frame buffer located in system RAM with packed pixels of any depth.
+ *
+ *  Based almost entirely on cfbimgblt.c
+ *
+ *      Copyright (C)  April 2007 Antonino Daplas <adaplas@pol.net>
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+#include <asm/types.h>
+
+#define DEBUG
+
+#ifdef DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt,__FUNCTION__,## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+static const u32 cfb_tab8[] = {
+#if defined(__BIG_ENDIAN)
+    0x00000000,0x000000ff,0x0000ff00,0x0000ffff,
+    0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff,
+    0xff000000,0xff0000ff,0xff00ff00,0xff00ffff,
+    0xffff0000,0xffff00ff,0xffffff00,0xffffffff
+#elif defined(__LITTLE_ENDIAN)
+    0x00000000,0xff000000,0x00ff0000,0xffff0000,
+    0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00,
+    0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff,
+    0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff
+#else
+#error FIXME: No endianness??
+#endif
+};
+
+static const u32 cfb_tab16[] = {
+#if defined(__BIG_ENDIAN)
+    0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
+#elif defined(__LITTLE_ENDIAN)
+    0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
+#else
+#error FIXME: No endianness??
+#endif
+};
+
+static const u32 cfb_tab32[] = {
+	0x00000000, 0xffffffff
+};
+
+static void color_imageblit(const struct fb_image *image, struct fb_info *p,
+			    void *dst1, u32 start_index, u32 pitch_index)
+{
+	/* Draw the penguin */
+	u32 *dst, *dst2;
+	u32 color = 0, val, shift;
+	int i, n, bpp = p->var.bits_per_pixel;
+	u32 null_bits = 32 - bpp;
+	u32 *palette = (u32 *) p->pseudo_palette;
+	const u8 *src = image->data;
+
+	dst2 = dst1;
+	for (i = image->height; i--; ) {
+		n = image->width;
+		dst = dst1;
+		shift = 0;
+		val = 0;
+
+		if (start_index) {
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,
+							 start_index));
+			val = *dst & start_mask;
+			shift = start_index;
+		}
+		while (n--) {
+			if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+			    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
+				color = palette[*src];
+			else
+				color = *src;
+			color <<= FB_LEFT_POS(bpp);
+			val |= FB_SHIFT_HIGH(color, shift);
+			if (shift >= null_bits) {
+				*dst++ = val;
+
+				val = (shift == null_bits) ? 0 :
+					FB_SHIFT_LOW(color, 32 - shift);
+			}
+			shift += bpp;
+			shift &= (32 - 1);
+			src++;
+		}
+		if (shift) {
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+
+			*dst &= end_mask;
+			*dst |= val;
+		}
+		dst1 += p->fix.line_length;
+		if (pitch_index) {
+			dst2 += p->fix.line_length;
+			dst1 = (u8 *)((long)dst2 & ~(sizeof(u32) - 1));
+
+			start_index += pitch_index;
+			start_index &= 32 - 1;
+		}
+	}
+}
+
+static void slow_imageblit(const struct fb_image *image, struct fb_info *p,
+				  void *dst1, u32 fgcolor, u32 bgcolor,
+				  u32 start_index, u32 pitch_index)
+{
+	u32 shift, color = 0, bpp = p->var.bits_per_pixel;
+	u32 *dst, *dst2;
+	u32 val, pitch = p->fix.line_length;
+	u32 null_bits = 32 - bpp;
+	u32 spitch = (image->width+7)/8;
+	const u8 *src = image->data, *s;
+	u32 i, j, l;
+
+	dst2 = dst1;
+	fgcolor <<= FB_LEFT_POS(bpp);
+	bgcolor <<= FB_LEFT_POS(bpp);
+
+	for (i = image->height; i--; ) {
+		shift = val = 0;
+		l = 8;
+		j = image->width;
+		dst = dst1;
+		s = src;
+
+		/* write leading bits */
+		if (start_index) {
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
+			val = *dst & start_mask;
+			shift = start_index;
+		}
+
+		while (j--) {
+			l--;
+			color = (*s & (1 << l)) ? fgcolor : bgcolor;
+			val |= FB_SHIFT_HIGH(color, shift);
+
+			/* Did the bitshift spill bits to the next long? */
+			if (shift >= null_bits) {
+				*dst++ = val;
+				val = (shift == null_bits) ? 0 :
+					FB_SHIFT_LOW(color,32 - shift);
+			}
+			shift += bpp;
+			shift &= (32 - 1);
+			if (!l) { l = 8; s++; };
+		}
+
+		/* write trailing bits */
+ 		if (shift) {
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+
+			*dst &= end_mask;
+			*dst |= val;
+		}
+
+		dst1 += pitch;
+		src += spitch;
+		if (pitch_index) {
+			dst2 += pitch;
+			dst1 = (u8 *)((long)dst2 & ~(sizeof(u32) - 1));
+			start_index += pitch_index;
+			start_index &= 32 - 1;
+		}
+
+	}
+}
+
+/*
+ * fast_imageblit - optimized monochrome color expansion
+ *
+ * Only if:  bits_per_pixel == 8, 16, or 32
+ *           image->width is divisible by pixel/dword (ppw);
+ *           fix->line_legth is divisible by 4;
+ *           beginning and end of a scanline is dword aligned
+ */
+static void fast_imageblit(const struct fb_image *image, struct fb_info *p,
+				  void *dst1, u32 fgcolor, u32 bgcolor)
+{
+	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
+	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
+	u32 bit_mask, end_mask, eorx, shift;
+	const char *s = image->data, *src;
+	u32 *dst;
+	const u32 *tab = NULL;
+	int i, j, k;
+
+	switch (bpp) {
+	case 8:
+		tab = cfb_tab8;
+		break;
+	case 16:
+		tab = cfb_tab16;
+		break;
+	case 32:
+	default:
+		tab = cfb_tab32;
+		break;
+	}
+
+	for (i = ppw-1; i--; ) {
+		fgx <<= bpp;
+		bgx <<= bpp;
+		fgx |= fgcolor;
+		bgx |= bgcolor;
+	}
+
+	bit_mask = (1 << ppw) - 1;
+	eorx = fgx ^ bgx;
+	k = image->width/ppw;
+
+	for (i = image->height; i--; ) {
+		dst = dst1;
+		shift = 8;
+		src = s;
+
+		for (j = k; j--; ) {
+			shift -= ppw;
+			end_mask = tab[(*src >> shift) & bit_mask];
+			*dst++ = (end_mask & eorx) ^ bgx;
+			if (!shift) {
+				shift = 8;
+				src++;
+			}
+		}
+		dst1 += p->fix.line_length;
+		s += spitch;
+	}
+}
+
+void sys_imageblit(struct fb_info *p, const struct fb_image *image)
+{
+	u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
+	u32 bpl = sizeof(u32), bpp = p->var.bits_per_pixel;
+	u32 width = image->width;
+	u32 dx = image->dx, dy = image->dy;
+	void *dst1;
+
+	if (p->state != FBINFO_STATE_RUNNING)
+		return;
+
+	bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
+	start_index = bitstart & (32 - 1);
+	pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
+
+	bitstart /= 8;
+	bitstart &= ~(bpl - 1);
+	dst1 = (void __force *)p->screen_base + bitstart;
+
+	if (p->fbops->fb_sync)
+		p->fbops->fb_sync(p);
+
+	if (image->depth == 1) {
+		if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+		    p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+			fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color];
+			bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color];
+		} else {
+			fgcolor = image->fg_color;
+			bgcolor = image->bg_color;
+		}
+
+		if (32 % bpp == 0 && !start_index && !pitch_index &&
+		    ((width & (32/bpp-1)) == 0) &&
+		    bpp >= 8 && bpp <= 32)
+			fast_imageblit(image, p, dst1, fgcolor, bgcolor);
+		else
+			slow_imageblit(image, p, dst1, fgcolor, bgcolor,
+					start_index, pitch_index);
+	} else
+		color_imageblit(image, p, dst1, start_index, pitch_index);
+}
+
+EXPORT_SYMBOL(sys_imageblit);
+
+MODULE_AUTHOR("Antonino Daplas <adaplas@pol.net>");
+MODULE_DESCRIPTION("1-bit/8-bit to 1-32 bit color expansion (sys-to-sys)");
+MODULE_LICENSE("GPL");
+