[PATCH] avr32 architecture

This adds support for the Atmel AVR32 architecture as well as the AT32AP7000
CPU and the AT32STK1000 development board.

AVR32 is a new high-performance 32-bit RISC microprocessor core, designed for
cost-sensitive embedded applications, with particular emphasis on low power
consumption and high code density.  The AVR32 architecture is not binary
compatible with earlier 8-bit AVR architectures.

The AVR32 architecture, including the instruction set, is described by the
AVR32 Architecture Manual, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32000.pdf

The Atmel AT32AP7000 is the first CPU implementing the AVR32 architecture.  It
features a 7-stage pipeline, 16KB instruction and data caches and a full
Memory Management Unit.  It also comes with a large set of integrated
peripherals, many of which are shared with the AT91 ARM-based controllers from
Atmel.

Full data sheet is available from

http://www.atmel.com/dyn/resources/prod_documents/doc32003.pdf

while the CPU core implementation including caches and MMU is documented by
the AVR32 AP Technical Reference, available from

http://www.atmel.com/dyn/resources/prod_documents/doc32001.pdf

Information about the AT32STK1000 development board can be found at

http://www.atmel.com/dyn/products/tools_card.asp?tool_id=3918

including a BSP CD image with an earlier version of this patch, development
tools (binaries and source/patches) and a root filesystem image suitable for
booting from SD card.

Alternatively, there's a preliminary "getting started" guide available at
http://avr32linux.org/twiki/bin/view/Main/GettingStarted which provides links
to the sources and patches you will need in order to set up a cross-compiling
environment for avr32-linux.

This patch, as well as the other patches included with the BSP and the
toolchain patches, is actively supported by Atmel Corporation.

[dmccr@us.ibm.com: Fix more pxx_page macro locations]
[bunk@stusta.de: fix `make defconfig']
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Dave McCracken <dmccr@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
new file mode 100644
index 0000000..5299f8c
--- /dev/null
+++ b/include/asm-avr32/bitops.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2004-2006 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_AVR32_BITOPS_H
+#define __ASM_AVR32_BITOPS_H
+
+#include <asm/byteorder.h>
+#include <asm/system.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler
+ */
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/*
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	sbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	or	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long tmp;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	cbr	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "i"(nr)
+			: "cc");
+	} else {
+		unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %2\n"
+			"	andn	%0, %3\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p)
+			: "m"(*p), "r"(mask)
+			: "cc");
+	}
+}
+
+/*
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%0, %2\n"
+		"	eor	%0, %3\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p)
+		: "m"(*p), "r"(mask)
+		: "cc");
+}
+
+/*
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	sbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%2, %3\n"
+			"	or	%0, %2, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	if (__builtin_constant_p(nr)) {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	cbr	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "i"(nr)
+			: "memory", "cc");
+	} else {
+		asm volatile(
+			"1:	ssrf	5\n"
+			"	ld.w	%0, %3\n"
+			"	mov	%2, %0\n"
+			"	andn	%0, %4\n"
+			"	stcond	%1, %0\n"
+			"	brne	1b"
+			: "=&r"(tmp), "=o"(*p), "=&r"(old)
+			: "m"(*p), "r"(mask)
+			: "memory", "cc");
+	}
+
+	return (old & mask) != 0;
+}
+
+/*
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void * addr)
+{
+	unsigned long *p = ((unsigned long *)addr) + nr / BITS_PER_LONG;
+	unsigned long mask = 1UL << (nr % BITS_PER_LONG);
+	unsigned long tmp, old;
+
+	asm volatile(
+		"1:	ssrf	5\n"
+		"	ld.w	%2, %3\n"
+		"	eor	%0, %2, %4\n"
+		"	stcond	%1, %0\n"
+		"	brne	1b"
+		: "=&r"(tmp), "=o"(*p), "=&r"(old)
+		: "m"(*p), "r"(mask)
+		: "memory", "cc");
+
+	return (old & mask) != 0;
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+/* Find First bit Set */
+static inline unsigned long __ffs(unsigned long word)
+{
+	unsigned long result;
+
+	asm("brev %1\n\t"
+	    "clz %0,%1"
+	    : "=r"(result), "=&r"(word)
+	    : "1"(word));
+	return result;
+}
+
+/* Find First Zero */
+static inline unsigned long ffz(unsigned long word)
+{
+	return __ffs(~word);
+}
+
+/* Find Last bit Set */
+static inline int fls(unsigned long word)
+{
+	unsigned long result;
+
+	asm("clz %0,%1" : "=r"(result) : "r"(word));
+	return 32 - result;
+}
+
+unsigned long find_first_zero_bit(const unsigned long *addr,
+				  unsigned long size);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+unsigned long find_first_bit(const unsigned long *addr,
+			     unsigned long size);
+unsigned long find_next_bit(const unsigned long *addr,
+				 unsigned long size,
+				 unsigned long offset);
+
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ *
+ * The difference is that bit numbering starts at 1, and if no bit is set,
+ * the function returns 0.
+ */
+static inline int ffs(unsigned long word)
+{
+	if(word == 0)
+		return 0;
+	return __ffs(word) + 1;
+}
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+#include <asm-generic/bitops/minix-le.h>
+
+#endif /* __ASM_AVR32_BITOPS_H */