[PATCH] avr32 architecture

This adds support for the Atmel AVR32 architecture as well as the AT32AP7000
CPU and the AT32STK1000 development board.

AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for
cost-sensitive embedded applications, with particular emphasis on low power
consumption and high code density.  The AVR32 architecture is not binary
compatible with earlier 8-bit AVR architectures.

The AVR32 architecture, including the instruction set, is described by the
AVR32 Architecture Manual, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf

The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture.  It
features a 7-stage pipeline, 16KB instruction and data caches and a full
Memory Management Unit.  It also comes with a large set of integrated
peripherals, many of which are shared with the AT91 ARM-based controllers from
Atmel.

Full data sheet is available from

http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf

while the CPU core implementation including caches and MMU is documented by
the AVR32 AP Technical Reference, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf

Information about the AT32STK1000 development board can be found at

http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918

including a BSP CD image with an earlier version of this patch, development
tools (binaries and source/patches) and a root filesystem image suitable for
booting from SD card.

Alternatively, there's a preliminary "getting started" guide available at
http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links
to the sources and patches you will need in order to set up a cross-compiling
environment for avr32-linux.

This patch, as well as the other patches included with the BSP and the
toolchain patches, is actively supported by Atmel Corporation.

[dmccr@us.ibm.com: Fix more pxx_page macro locations]
[bunk@stusta.de: fix `make defconfig']
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave McCracken <dmccr@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/avr32/lib/Makefile b/arch/avr32/lib/Makefile
new file mode 100644
index 0000000..09ac43e
--- /dev/null
+++ b/arch/avr32/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for AVR32-specific library files
+#
+
+lib-y	:= copy_user.o clear_user.o
+lib-y	+= strncpy_from_user.o strnlen_user.o
+lib-y	+= delay.o memset.o memcpy.o findbit.o
+lib-y	+= csum_partial.o csum_partial_copy_generic.o
+lib-y	+= io-readsw.o io-readsl.o io-writesw.o io-writesl.o
+lib-y	+= __avr32_lsl64.o __avr32_lsr64.o __avr32_asr64.o
diff --git a/arch/avr32/lib/__avr32_asr64.S b/arch/avr32/lib/__avr32_asr64.S
new file mode 100644
index 0000000..368b6bc
--- /dev/null
+++ b/arch/avr32/lib/__avr32_asr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_asr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_asr64
+	.type	__avr32_asr64,@function
+__avr32_asr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r10, r10, r12
+	asr	r11, r11, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	asr	r10, r11, r9
+	asr	r11, 31
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsl64.S b/arch/avr32/lib/__avr32_lsl64.S
new file mode 100644
index 0000000..f1dbc2b
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsl64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsl64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsl64
+	.type	__avr32_lsl64,@function
+__avr32_lsl64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsr	r8, r10, r9
+	lsl	r10, r10, r12
+	lsl	r11, r11, r12
+	or	r11, r8
+	retal	r12
+
+1:	neg	r9
+	lsl	r11, r10, r9
+	mov	r10, 0
+	retal	r12
diff --git a/arch/avr32/lib/__avr32_lsr64.S b/arch/avr32/lib/__avr32_lsr64.S
new file mode 100644
index 0000000..e65bb7f
--- /dev/null
+++ b/arch/avr32/lib/__avr32_lsr64.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * DWtype __avr32_lsr64(DWtype u, word_type b)
+	 */
+	.text
+	.global	__avr32_lsr64
+	.type	__avr32_lsr64,@function
+__avr32_lsr64:
+	cp.w	r12, 0
+	reteq	r12
+
+	rsub	r9, r12, 32
+	brle	1f
+
+	lsl	r8, r11, r9
+	lsr	r11, r11, r12
+	lsr	r10, r10, r12
+	or	r10, r8
+	retal	r12
+
+1:	neg	r9
+	lsr	r10, r11, r9
+	mov	r11, 0
+	retal	r12
diff --git a/arch/avr32/lib/clear_user.S b/arch/avr32/lib/clear_user.S
new file mode 100644
index 0000000..d8991b6
--- /dev/null
+++ b/arch/avr32/lib/clear_user.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	clear_user
+	.type	clear_user, "function"
+clear_user:
+	branch_if_kernel r8, __clear_user
+	ret_if_privileged r8, r12, r11, r11
+
+	.global	__clear_user
+	.type	__clear_user, "function"
+__clear_user:
+	mov	r9, r12
+	mov	r8, 0
+	andl	r9, 3, COH
+	brne	5f
+
+1:	sub	r11, 4
+	brlt	2f
+
+10:	st.w	r12++, r8
+	sub	r11, 4
+	brge	10b
+
+2:	sub	r11, -4
+	reteq	0
+
+	/* Unaligned count or address */
+	bld	r11, 1
+	brcc	12f
+11:	st.h	r12++, r8
+	sub	r11, 2
+	reteq	0
+12:	st.b	r12++, r8
+	retal	0
+
+	/* Unaligned address */
+5:	cp.w	r11, 4
+	brlt	2b
+
+	lsl	r9, 2
+	add	pc, pc, r9
+13:	st.b	r12++, r8
+	sub	r11, 1
+14:	st.b	r12++, r8
+	sub	r11, 1
+15:	st.b	r12++, r8
+	sub	r11, 1
+	rjmp	1b
+
+	.size	clear_user, . - clear_user
+	.size	__clear_user, . - __clear_user
+
+	.section .fixup, "ax"
+	.align	1
+18:	sub	r11, -4
+19:	retal	r11
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 18b
+	.long	11b, 19b
+	.long	12b, 19b
+	.long	13b, 19b
+	.long	14b, 19b
+	.long	15b, 19b
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
new file mode 100644
index 0000000..ea59c04
--- /dev/null
+++ b/arch/avr32/lib/copy_user.S
@@ -0,0 +1,119 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * __kernel_size_t
+	 * __copy_user(void *to, const void *from, __kernel_size_t n)
+	 *
+	 * Returns the number of bytes not copied. Might be off by
+	 * max 3 bytes if we get a fault in the main loop.
+	 *
+	 * The address-space checking functions simply fall through to
+	 * the non-checking version.
+	 */
+	.text
+	.align	1
+	.global	copy_from_user
+	.type	copy_from_user, @function
+copy_from_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r11, r10, r10
+	rjmp	__copy_user
+	.size	copy_from_user, . - copy_from_user
+
+	.global	copy_to_user
+	.type	copy_to_user, @function
+copy_to_user:
+	branch_if_kernel r8, __copy_user
+	ret_if_privileged r8, r12, r10, r10
+	.size	copy_to_user, . - copy_to_user
+
+	.global	__copy_user
+	.type	__copy_user, @function
+__copy_user:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	6f
+
+	/* At this point, from is word-aligned */
+1:	sub	r10, 4
+	brlt	3f
+
+2:
+10:	ld.w	r8, r11++
+11:	st.w	r12++, r8
+	sub	r10, 4
+	brge	2b
+
+3:	sub	r10, -4
+	reteq	0
+
+	/*
+	 * Handle unaligned count. Need to be careful with r10 here so
+	 * that we return the correct value even if we get a fault
+	 */
+4:
+20:	ld.ub	r8, r11++
+21:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+22:	ld.ub	r8, r11++
+23:	st.b	r12++, r8
+	sub	r10, 1
+	reteq	0
+24:	ld.ub	r8, r11++
+25:	st.b	r12++, r8
+	retal	0
+
+	/* Handle unaligned from-pointer */
+6:	cp.w	r10, 4
+	brlt	4b
+	rsub	r9, r9, 4
+
+30:	ld.ub	r8, r11++
+31:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+32:	ld.ub	r8, r11++
+33:	st.b	r12++, r8
+	sub	r10, 1
+	sub	r9, 1
+	breq	1b
+34:	ld.ub	r8, r11++
+35:	st.b	r12++, r8
+	sub	r10, 1
+	rjmp	1b
+	.size	__copy_user, . - __copy_user
+
+	.section .fixup,"ax"
+	.align	1
+19:	sub	r10, -4
+29:	retal	r10
+
+	.section __ex_table,"a"
+	.align	2
+	.long	10b, 19b
+	.long	11b, 19b
+	.long	20b, 29b
+	.long	21b, 29b
+	.long	22b, 29b
+	.long	23b, 29b
+	.long	24b, 29b
+	.long	25b, 29b
+	.long	30b, 29b
+	.long	31b, 29b
+	.long	32b, 29b
+	.long	33b, 29b
+	.long	34b, 29b
+	.long	35b, 29b
diff --git a/arch/avr32/lib/csum_partial.S b/arch/avr32/lib/csum_partial.S
new file mode 100644
index 0000000..6a262b5
--- /dev/null
+++ b/arch/avr32/lib/csum_partial.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * unsigned int csum_partial(const unsigned char *buff,
+	 *			     int len, unsigned int sum)
+	 */
+	.text
+	.global	csum_partial
+	.type	csum_partial,"function"
+	.align	1
+csum_partial:
+	/* checksum complete words, aligned or not */
+3:	sub	r11, 4
+	brlt	5f
+4:	ld.w	r9, r12++
+	add	r10, r9
+	acr	r10
+	sub	r11, 4
+	brge	4b
+
+	/* return if we had a whole number of words */
+5:	sub	r11, -4
+	reteq	r10
+
+	/* checksum any remaining bytes at the end */
+	mov	r9, 0
+	mov	r8, 0
+	cp	r11, 2
+	brlt	6f
+	ld.uh	r9, r12++
+	sub	r11, 2
+	breq	7f
+	lsl	r9, 16
+6:	ld.ub	r8, r12++
+	lsl	r8, 8
+7:	or	r9, r8
+	add	r10, r9
+	acr	r10
+
+	retal	r10
+	.size	csum_partial, . - csum_partial
diff --git a/arch/avr32/lib/csum_partial_copy_generic.S b/arch/avr32/lib/csum_partial_copy_generic.S
new file mode 100644
index 0000000..a3a0f9b
--- /dev/null
+++ b/arch/avr32/lib/csum_partial_copy_generic.S
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/errno.h>
+#include <asm/asm.h>
+
+	/*
+	 * unsigned int csum_partial_copy_generic(const char *src, char *dst, int len
+	 *					  int sum, int *src_err_ptr,
+	 *					  int *dst_err_ptr)
+	 *
+	 * Copy src to dst while checksumming, otherwise like csum_partial.
+	 */
+
+	.macro ld_src size, reg, ptr
+9999:	ld.\size \reg, \ptr
+	.section __ex_table, "a"
+	.long	9999b, fixup_ld_src
+	.previous
+	.endm
+
+	.macro st_dst size, ptr, reg
+9999:	st.\size \ptr, \reg
+	.section __ex_table, "a"
+	.long	9999b, fixup_st_dst
+	.previous
+	.endm
+
+	.text
+	.global	csum_partial_copy_generic
+	.type	csum_partial_copy_generic,"function"
+	.align	1
+csum_partial_copy_generic:
+	pushm	r4-r7,lr
+
+	/* The inner loop */
+1:	sub	r10, 4
+	brlt	5f
+2:	ld_src	w, r5, r12++
+	st_dst	w, r11++, r5
+	add	r9, r5
+	acr	r9
+	sub	r10, 4
+	brge	2b
+
+	/* return if we had a whole number of words */
+5:	sub	r10, -4
+	brne	7f
+
+6:	mov	r12, r9
+	popm	r4-r7,pc
+
+	/* handle additional bytes at the tail */
+7:	mov	r5, 0
+	mov	r4, 32
+8:	ld_src	ub, r6, r12++
+	st_dst	b, r11++, r6
+	lsl	r5, 8
+	sub	r4, 8
+	bfins	r5, r6, 0, 8
+	sub	r10, 1
+	brne	8b
+
+	lsl	r5, r5, r4
+	add	r9, r5
+	acr	r9
+	rjmp	6b
+
+	/* Exception handler */
+	.section .fixup,"ax"
+	.align	1
+fixup_ld_src:
+	mov	r9, -EFAULT
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+
+1:	/*
+	 * TODO: zero the complete destination - computing the rest
+	 * is too much work
+	 */
+
+	mov	r9, 0
+	rjmp	6b
+
+fixup_st_dst:
+	mov	r9, -EFAULT
+	lddsp	r8, sp[20]
+	cp.w	r8, 0
+	breq	1f
+	st.w	r8[0], r9
+1:	mov	r9, 0
+	rjmp	6b
+
+	.previous
diff --git a/arch/avr32/lib/delay.c b/arch/avr32/lib/delay.c
new file mode 100644
index 0000000..462c830
--- /dev/null
+++ b/arch/avr32/lib/delay.c
@@ -0,0 +1,55 @@
+/*
+ *      Precise Delay Loops for avr32
+ *
+ *      Copyright (C) 1993 Linus Torvalds
+ *      Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *	Copyright (C) 2005-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include <asm/sysreg.h>
+
+int read_current_timer(unsigned long *timer_value)
+{
+	*timer_value = sysreg_read(COUNT);
+	return 0;
+}
+
+void __delay(unsigned long loops)
+{
+	unsigned bclock, now;
+
+	bclock = sysreg_read(COUNT);
+	do {
+		now = sysreg_read(COUNT);
+	} while ((now - bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+	unsigned long long loops;
+
+	asm("mulu.d %0, %1, %2"
+	    : "=r"(loops)
+	    : "r"(current_cpu_data.loops_per_jiffy * HZ), "r"(xloops));
+	__delay(loops >> 32);
+}
+
+void __udelay(unsigned long usecs)
+{
+	__const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+
+void __ndelay(unsigned long nsecs)
+{
+	__const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/avr32/lib/findbit.S b/arch/avr32/lib/findbit.S
new file mode 100644
index 0000000..2b4856f
--- /dev/null
+++ b/arch/avr32/lib/findbit.S
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+
+	.text
+	/*
+	 * unsigned long find_first_zero_bit(const unsigned long *addr,
+	 *				     unsigned long size)
+	 */
+ENTRY(find_first_zero_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_zero_bit(const unsigned long *addr,
+	 *				    unsigned long size,
+	 *				    unsigned long offset)
+	 */
+ENTRY(find_next_zero_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	com	r8
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	com	r8
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/* Common return path for when a bit is actually found. */
+.L_found:
+	brev	r8
+	clz	r10, r8
+	rsub	r9, r11
+	add	r10, r9
+
+	/* XXX: If we don't have to return exactly "size" when the bit
+	   is not found, we may drop this "min" thing */
+	min	r12, r11, r10
+	retal	r12
+
+	/*
+	 * unsigned long find_first_bit(const unsigned long *addr,
+	 *				unsigned long size)
+	 */
+ENTRY(find_first_bit)
+	cp.w	r11, 0
+	reteq	r11
+	mov	r9, r11
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+	/*
+	 * unsigned long find_next_bit(const unsigned long *addr,
+	 *			       unsigned long size,
+	 *			       unsigned long offset)
+	 */
+ENTRY(find_next_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ld.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ld.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
+
+ENTRY(generic_find_next_zero_le_bit)
+	lsr	r8, r10, 5
+	sub	r9, r11, r10
+	retle	r11
+
+	lsl	r8, 2
+	add	r12, r8
+	andl	r10, 31, COH
+	breq	1f
+
+	/* offset is not word-aligned. Handle the first (32 - r10) bits */
+	ldswp.w	r8, r12[0]
+	sub	r12, -4
+	lsr	r8, r8, r10
+	brne	.L_found
+
+	/* r9 = r9 - (32 - r10) = r9 + r10 - 32 */
+	add	r9, r10
+	sub	r9, 32
+	retle	r11
+
+	/* Main loop. offset must be word-aligned */
+1:	ldswp.w	r8, r12[0]
+	cp.w	r8, 0
+	brne	.L_found
+	sub	r12, -4
+	sub	r9, 32
+	brgt	1b
+	retal	r11
diff --git a/arch/avr32/lib/io-readsl.S b/arch/avr32/lib/io-readsl.S
new file mode 100644
index 0000000..b103511
--- /dev/null
+++ b/arch/avr32/lib/io-readsl.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_readsl
+	.type	__raw_readsl,@function
+__raw_readsl:
+	cp.w	r10, 0
+	reteq	r12
+
+	/*
+	 * If r11 isn't properly aligned, we might get an exception on
+	 * some implementations. But there's not much we can do about it.
+	 */
+1:	ld.w	r8, r12[0]
+	sub	r10, 1
+	st.w	r11++, r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-readsw.S b/arch/avr32/lib/io-readsw.S
new file mode 100644
index 0000000..456be99
--- /dev/null
+++ b/arch/avr32/lib/io-readsw.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	/*
+	 * Bad alignment will cause a hardware exception, which is as
+	 * good as anything. No need for us to check for proper alignment.
+	 */
+	ld.uh	r8, r12[0]
+	sub	r10, 1
+	st.h	r11++, r8
+
+	/* fall through */
+
+	.global	__raw_readsw
+	.type	__raw_readsw,@function
+__raw_readsw:
+	cp.w	r10, 0
+	reteq	r12
+	mov	r9, 3
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ldins.h	r8:t, r12[0]
+	ldins.h	r8:b, r12[0]
+	st.w	r11++, r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r12[0]
+	st.h	r11++, r8
+	retal	r12
diff --git a/arch/avr32/lib/io-writesl.S b/arch/avr32/lib/io-writesl.S
new file mode 100644
index 0000000..22138b3
--- /dev/null
+++ b/arch/avr32/lib/io-writesl.S
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	.global	__raw_writesl
+	.type	__raw_writesl,@function
+__raw_writesl:
+	cp.w	r10, 0
+	reteq	r12
+
+1:	ld.w	r8, r11++
+	sub	r10, 1
+	st.w	r12[0], r8
+	brne	1b
+
+	retal	r12
diff --git a/arch/avr32/lib/io-writesw.S b/arch/avr32/lib/io-writesw.S
new file mode 100644
index 0000000..8c4a53f
--- /dev/null
+++ b/arch/avr32/lib/io-writesw.S
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+.Lnot_word_aligned:
+	ld.uh	r8, r11++
+	sub	r10, 1
+	st.h	r12[0], r8
+
+	.global	__raw_writesw
+	.type	__raw_writesw,@function
+__raw_writesw:
+	cp.w	r10, 0
+	mov	r9, 3
+	reteq	r12
+	tst	r11, r9
+	brne	.Lnot_word_aligned
+
+	sub	r10, 2
+	brlt	2f
+
+1:	ld.w	r8, r11++
+	bfextu	r9, r8, 16, 16
+	st.h	r12[0], r9
+	st.h	r12[0], r8
+	sub	r10, 2
+	brge	1b
+
+2:	sub	r10, -2
+	reteq	r12
+
+	ld.uh	r8, r11++
+	st.h	r12[0], r8
+	retal	r12
diff --git a/arch/avr32/lib/libgcc.h b/arch/avr32/lib/libgcc.h
new file mode 100644
index 0000000..5a091b5
--- /dev/null
+++ b/arch/avr32/lib/libgcc.h
@@ -0,0 +1,33 @@
+/* Definitions for various functions 'borrowed' from gcc-3.4.3 */
+
+#define BITS_PER_UNIT	8
+
+typedef		 int QItype	__attribute__ ((mode (QI)));
+typedef unsigned int UQItype	__attribute__ ((mode (QI)));
+typedef		 int HItype	__attribute__ ((mode (HI)));
+typedef unsigned int UHItype	__attribute__ ((mode (HI)));
+typedef 	 int SItype	__attribute__ ((mode (SI)));
+typedef unsigned int USItype	__attribute__ ((mode (SI)));
+typedef		 int DItype	__attribute__ ((mode (DI)));
+typedef unsigned int UDItype	__attribute__ ((mode (DI)));
+typedef 	float SFtype	__attribute__ ((mode (SF)));
+typedef		float DFtype	__attribute__ ((mode (DF)));
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#define W_TYPE_SIZE (4 * BITS_PER_UNIT)
+#define Wtype	SItype
+#define UWtype	USItype
+#define HWtype	SItype
+#define UHWtype	USItype
+#define DWtype	DItype
+#define UDWtype	UDItype
+#define __NW(a,b)	__ ## a ## si ## b
+#define __NDW(a,b)	__ ## a ## di ## b
+
+struct DWstruct {Wtype high, low;};
+
+typedef union
+{
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
diff --git a/arch/avr32/lib/longlong.h b/arch/avr32/lib/longlong.h
new file mode 100644
index 0000000..cd5e369
--- /dev/null
+++ b/arch/avr32/lib/longlong.h
@@ -0,0 +1,98 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Free Software Foundation, Inc.
+
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Borrowed from gcc-3.4.3 */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define count_leading_zeros(count, x) ((count) = __builtin_clz(x))
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define udiv_qrnnd __udiv_qrnnd_c
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
diff --git a/arch/avr32/lib/memcpy.S b/arch/avr32/lib/memcpy.S
new file mode 100644
index 0000000..0abb261
--- /dev/null
+++ b/arch/avr32/lib/memcpy.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+	/*
+	 * void *memcpy(void *to, const void *from, unsigned long n)
+	 *
+	 * This implementation does word-aligned loads in the main loop,
+	 * possibly sacrificing alignment of stores.
+	 *
+	 * Hopefully, in most cases, both "to" and "from" will be
+	 * word-aligned to begin with.
+	 */
+	.text
+	.global	memcpy
+	.type	memcpy, @function
+memcpy:
+	mov	r9, r11
+	andl	r9, 3, COH
+	brne	1f
+
+	/* At this point, "from" is word-aligned */
+2:	sub	r10, 4
+	mov	r9, r12
+	brlt	4f
+
+3:	ld.w	r8, r11++
+	sub	r10, 4
+	st.w	r12++, r8
+	brge	3b
+
+4:	neg	r10
+	reteq	r9
+
+	/* Handle unaligned count */
+	lsl	r10, 2
+	add	pc, pc, r10
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	retal	r9
+
+	/* Handle unaligned "from" pointer */
+1:	sub	r10, 4
+	brlt	4b
+	add	r10, r9
+	lsl	r9, 2
+	add	pc, pc, r9
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	ld.ub	r8, r11++
+	st.b	r12++, r8
+	rjmp	2b
diff --git a/arch/avr32/lib/memset.S b/arch/avr32/lib/memset.S
new file mode 100644
index 0000000..40da32c
--- /dev/null
+++ b/arch/avr32/lib/memset.S
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * Based on linux/arch/arm/lib/memset.S
+ *   Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ASM optimised string functions
+ */
+#include <asm/asm.h>
+
+	/*
+	 * r12:	void *b
+	 * r11:	int c
+	 * r10:	size_t len
+	 *
+	 * Returns b in r12
+	 */
+	.text
+	.global	memset
+	.type	memset, @function
+	.align	5
+memset:
+	mov	r9, r12
+	mov	r8, r12
+	or	r11, r11, r11 << 8
+	andl	r9, 3, COH
+	brne	1f
+
+2:	or	r11, r11, r11 << 16
+	sub	r10, 4
+	brlt	5f
+
+	/* Let's do some real work */
+4:	st.w	r8++, r11
+	sub	r10, 4
+	brge	4b
+
+	/*
+	 * When we get here, we've got less than 4 bytes to set. r10
+	 * might be negative.
+	 */
+5:	sub	r10, -4
+	reteq	r12
+
+	/* Fastpath ends here, exactly 32 bytes from memset */
+
+	/* Handle unaligned count or pointer */
+	bld	r10, 1
+	brcc	6f
+	st.b	r8++, r11
+	st.b	r8++, r11
+	bld	r10, 0
+	retcc	r12
+6:	st.b	r8++, r11
+	retal	r12
+
+	/* Handle unaligned pointer */
+1:	sub	r10, 4
+	brlt	5b
+	add	r10, r9
+	lsl	r9, 1
+	add	pc, r9
+	st.b	r8++, r11
+	st.b	r8++, r11
+	st.b	r8++, r11
+	rjmp	2b
+
+	.size	memset, . - memset
diff --git a/arch/avr32/lib/strncpy_from_user.S b/arch/avr32/lib/strncpy_from_user.S
new file mode 100644
index 0000000..72bd505
--- /dev/null
+++ b/arch/avr32/lib/strncpy_from_user.S
@@ -0,0 +1,60 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/asm.h>
+
+	/*
+	 * long strncpy_from_user(char *dst, const char *src, long count)
+	 *
+	 * On success, returns the length of the string, not including
+	 * the terminating NUL.
+	 *
+	 * If the string is longer than count, returns count
+	 *
+	 * If userspace access fails, returns -EFAULT
+	 */
+	.text
+	.align	1
+	.global	strncpy_from_user
+	.type	strncpy_from_user, "function"
+strncpy_from_user:
+	mov	r9, -EFAULT
+	branch_if_kernel r8, __strncpy_from_user
+	ret_if_privileged r8, r11, r10, r9
+
+	.global	__strncpy_from_user
+	.type	__strncpy_from_user, "function"
+__strncpy_from_user:
+	cp.w	r10, 0
+	reteq	0
+
+	mov	r9, r10
+
+1:	ld.ub	r8, r11++
+	st.b	r12++, r8
+	cp.w	r8, 0
+	breq	2f
+	sub	r9, 1
+	brne	1b
+
+2:	sub	r10, r9
+	retal	r10
+
+	.section .fixup, "ax"
+	.align	1
+3:	mov	r12, -EFAULT
+	retal	r12
+
+	.section __ex_table, "a"
+	.align	2
+	.long	1b, 3b
diff --git a/arch/avr32/lib/strnlen_user.S b/arch/avr32/lib/strnlen_user.S
new file mode 100644
index 0000000..65ce11a
--- /dev/null
+++ b/arch/avr32/lib/strnlen_user.S
@@ -0,0 +1,67 @@
+/*
+ * Copy to/from userspace with optional address space checking.
+ *
+ * Copyright 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/asm.h>
+
+	.text
+	.align	1
+	.global	strnlen_user
+	.type	strnlen_user, "function"
+strnlen_user:
+	branch_if_kernel r8, __strnlen_user
+	sub	r8, r11, 1
+	add	r8, r12
+	retcs	0
+	brmi	adjust_length	/* do a closer inspection */
+
+	.global	__strnlen_user
+	.type	__strnlen_user, "function"
+__strnlen_user:
+	mov	r10, r12
+
+10:	ld.ub	r8, r12++
+	cp.w	r8, 0
+	breq	2f
+	sub	r11, 1
+	brne	10b
+
+	sub	r12, -1
+2:	sub	r12, r10
+	retal	r12
+
+
+	.type	adjust_length, "function"
+adjust_length:
+	cp.w	r12, 0		/* addr must always be < TASK_SIZE */
+	retmi	0
+
+	pushm	lr
+	lddpc	lr, _task_size
+	sub	r11, lr, r12
+	mov	r9, r11
+	rcall	__strnlen_user
+	cp.w	r12, r9
+	brgt	1f
+	popm	pc
+1:	popm	pc, r12=0
+
+	.align	2
+_task_size:
+	.long	TASK_SIZE
+
+	.section .fixup, "ax"
+	.align	1
+19:	retal	0
+
+	.section __ex_table, "a"
+	.align	2
+	.long	10b, 19b