[PATCH] avr32 architecture

This adds support for the Atmel AVR32 architecture as well as the AT32AP7000
CPU and the AT32STK1000 development board.

AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for
cost-sensitive embedded applications, with particular emphasis on low power
consumption and high code density.  The AVR32 architecture is not binary
compatible with earlier 8-bit AVR architectures.

The AVR32 architecture, including the instruction set, is described by the
AVR32 Architecture Manual, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf

The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture.  It
features a 7-stage pipeline, 16KB instruction and data caches and a full
Memory Management Unit.  It also comes with a large set of integrated
peripherals, many of which are shared with the AT91 ARM-based controllers from
Atmel.

Full data sheet is available from

http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf

while the CPU core implementation including caches and MMU is documented by
the AVR32 AP Technical Reference, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf

Information about the AT32STK1000 development board can be found at

http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918

including a BSP CD image with an earlier version of this patch, development
tools (binaries and source/patches) and a root filesystem image suitable for
booting from SD card.

Alternatively, there's a preliminary "getting started" guide available at
http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links
to the sources and patches you will need in order to set up a cross-compiling
environment for avr32-linux.

This patch, as well as the other patches included with the BSP and the
toolchain patches, is actively supported by Atmel Corporation.

[dmccr@us.ibm.com: Fix more pxx_page macro locations]
[bunk@stusta.de: fix `make defconfig']
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave McCracken <dmccr@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 0000000..2b4856f
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+	.text
+	/*
+	 * unsigned long find_first_zero_bit(const unsigned long *addr,
+	 *				     unsigned long size)
+	 */
+ENTRY(find_first_zero_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_zero_bit(const unsigned long *addr,
+	 *				    unsigned long size,
+	 *				    unsigned long offset)
+	 */
+ENTRY(find_next_zero_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	com	r8
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/* Common return path for when a bit is actually found. */
+.L_found:
+	brev	r8
+	clz	r10, r8
+	rsub	r9, r11
+	add	r10, r9
+
+	/* XXX: If we don't have to return exactly "size" when the bit
+	   is not found, we may drop this "min" thing */
+	min	r12, r11, r10
+	retal	r12
+
+	/*
+	 * unsigned long find_first_bit(const unsigned long *addr,
+	 *				unsigned long size)
+	 */
+ENTRY(find_first_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_bit(const unsigned long *addr,
+	 *			       unsigned long size,
+	 *			       unsigned long offset)
+	 */
+ENTRY(find_next_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+ENTRY(generic_find_next_zero_le_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ldswp.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ldswp.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11