Blackfin arch:  Extend sram malloc to handle L2 SRAM.

Extend system call to alloc L2 SRAM in application.
Automatically move following sections to L2 SRAM:
1. kernel built-in l2 attribute section
2. kernel module l2 attribute section
3. elf-fdpic application l2 attribute section

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>

diff --git a/arch/blackfin/mm/blackfin_sram.c b/arch/blackfin/mm/blackfin_sram.c
index b58cf19..5af3c31 100644
--- a/arch/blackfin/mm/blackfin_sram.c
+++ b/arch/blackfin/mm/blackfin_sram.c
@@ -42,6 +42,7 @@
 #include "blackfin_sram.h"
 
 static spinlock_t l1sram_lock, l1_data_sram_lock, l1_inst_sram_lock;
+static spinlock_t l2_sram_lock;
 
 /* the data structure for L1 scratchpad and DATA SRAM */
 struct sram_piece {
@@ -65,6 +66,10 @@
 static struct sram_piece free_l1_inst_sram_head, used_l1_inst_sram_head;
 #endif
 
+#ifdef L2_LENGTH
+static struct sram_piece free_l2_sram_head, used_l2_sram_head;
+#endif
+
 static struct kmem_cache *sram_piece_cache;
 
 /* L1 Scratchpad SRAM initialization function */
@@ -97,7 +102,7 @@
 	free_l1_data_A_sram_head.next =
 		kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
 	if (!free_l1_data_A_sram_head.next) {
-		printk(KERN_INFO"Fail to initialize Data A SRAM.\n");
+		printk(KERN_INFO"Fail to initialize L1 Data A SRAM.\n");
 		return;
 	}
 
@@ -110,7 +115,7 @@
 
 	used_l1_data_A_sram_head.next = NULL;
 
-	printk(KERN_INFO "Blackfin Data A SRAM: %d KB (%d KB free)\n",
+	printk(KERN_INFO "Blackfin L1 Data A SRAM: %d KB (%d KB free)\n",
 		L1_DATA_A_LENGTH >> 10,
 		free_l1_data_A_sram_head.next->size >> 10);
 #endif
@@ -118,7 +123,7 @@
 	free_l1_data_B_sram_head.next =
 		kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
 	if (!free_l1_data_B_sram_head.next) {
-		printk(KERN_INFO"Fail to initialize Data B SRAM.\n");
+		printk(KERN_INFO"Fail to initialize L1 Data B SRAM.\n");
 		return;
 	}
 
@@ -131,7 +136,7 @@
 
 	used_l1_data_B_sram_head.next = NULL;
 
-	printk(KERN_INFO "Blackfin Data B SRAM: %d KB (%d KB free)\n",
+	printk(KERN_INFO "Blackfin L1 Data B SRAM: %d KB (%d KB free)\n",
 		L1_DATA_B_LENGTH >> 10,
 		free_l1_data_B_sram_head.next->size >> 10);
 #endif
@@ -146,7 +151,7 @@
 	free_l1_inst_sram_head.next =
 		kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
 	if (!free_l1_inst_sram_head.next) {
-		printk(KERN_INFO"Fail to initialize Instruction SRAM.\n");
+		printk(KERN_INFO"Fail to initialize L1 Instruction SRAM.\n");
 		return;
 	}
 
@@ -159,7 +164,7 @@
 
 	used_l1_inst_sram_head.next = NULL;
 
-	printk(KERN_INFO "Blackfin Instruction SRAM: %d KB (%d KB free)\n",
+	printk(KERN_INFO "Blackfin L1 Instruction SRAM: %d KB (%d KB free)\n",
 		L1_CODE_LENGTH >> 10,
 		free_l1_inst_sram_head.next->size >> 10);
 #endif
@@ -168,6 +173,33 @@
 	spin_lock_init(&l1_inst_sram_lock);
 }
 
+static void __init l2_sram_init(void)
+{
+#ifdef L2_LENGTH
+	free_l2_sram_head.next =
+		kmem_cache_alloc(sram_piece_cache, GFP_KERNEL);
+	if (!free_l2_sram_head.next) {
+		printk(KERN_INFO"Fail to initialize L2 SRAM.\n");
+		return;
+	}
+
+	free_l2_sram_head.next->paddr = (void *)L2_START +
+		(_etext_l2 - _stext_l2) + (_edata_l2 - _sdata_l2);
+	free_l2_sram_head.next->size = L2_LENGTH -
+		(_etext_l2 - _stext_l2) + (_edata_l2 - _sdata_l2);
+	free_l2_sram_head.next->pid = 0;
+	free_l2_sram_head.next->next = NULL;
+
+	used_l2_sram_head.next = NULL;
+
+	printk(KERN_INFO "Blackfin L2 SRAM: %d KB (%d KB free)\n",
+		L2_LENGTH >> 10,
+		free_l2_sram_head.next->size >> 10);
+#endif
+
+	/* mutex initialize */
+	spin_lock_init(&l2_sram_lock);
+}
 void __init bfin_sram_init(void)
 {
 	sram_piece_cache = kmem_cache_create("sram_piece_cache",
@@ -177,10 +209,11 @@
 	l1sram_init();
 	l1_data_sram_init();
 	l1_inst_sram_init();
+	l2_sram_init();
 }
 
-/* L1 memory allocate function */
-static void *_l1_sram_alloc(size_t size, struct sram_piece *pfree_head,
+/* SRAM allocate function */
+static void *_sram_alloc(size_t size, struct sram_piece *pfree_head,
 		struct sram_piece *pused_head)
 {
 	struct sram_piece *pslot, *plast, *pavail;
@@ -236,7 +269,7 @@
 }
 
 /* Allocate the largest available block.  */
-static void *_l1_sram_alloc_max(struct sram_piece *pfree_head,
+static void *_sram_alloc_max(struct sram_piece *pfree_head,
 				struct sram_piece *pused_head,
 				unsigned long *psize)
 {
@@ -259,11 +292,11 @@
 
 	*psize = pmax->size;
 
-	return _l1_sram_alloc(*psize, pfree_head, pused_head);
+	return _sram_alloc(*psize, pfree_head, pused_head);
 }
 
-/* L1 memory free function */
-static int _l1_sram_free(const void *addr,
+/* SRAM free function */
+static int _sram_free(const void *addr,
 			struct sram_piece *pfree_head,
 			struct sram_piece *pused_head)
 {
@@ -334,6 +367,11 @@
 		 && addr < (void *)(L1_DATA_B_START + L1_DATA_B_LENGTH))
 		return l1_data_B_sram_free(addr);
 #endif
+#ifdef L2_LENGTH
+	else if (addr >= (void *)L2_START
+		 && addr < (void *)(L2_START + L2_LENGTH))
+		return l2_sram_free(addr);
+#endif
 	else
 		return -1;
 }
@@ -348,7 +386,7 @@
 	spin_lock_irqsave(&l1_data_sram_lock, flags);
 
 #if L1_DATA_A_LENGTH != 0
-	addr = _l1_sram_alloc(size, &free_l1_data_A_sram_head,
+	addr = _sram_alloc(size, &free_l1_data_A_sram_head,
 			&used_l1_data_A_sram_head);
 #endif
 
@@ -371,7 +409,7 @@
 	spin_lock_irqsave(&l1_data_sram_lock, flags);
 
 #if L1_DATA_A_LENGTH != 0
-	ret = _l1_sram_free(addr, &free_l1_data_A_sram_head,
+	ret = _sram_free(addr, &free_l1_data_A_sram_head,
 			&used_l1_data_A_sram_head);
 #else
 	ret = -1;
@@ -393,7 +431,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1_data_sram_lock, flags);
 
-	addr = _l1_sram_alloc(size, &free_l1_data_B_sram_head,
+	addr = _sram_alloc(size, &free_l1_data_B_sram_head,
 			&used_l1_data_B_sram_head);
 
 	/* add mutex operation */
@@ -418,7 +456,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1_data_sram_lock, flags);
 
-	ret = _l1_sram_free(addr, &free_l1_data_B_sram_head,
+	ret = _sram_free(addr, &free_l1_data_B_sram_head,
 			&used_l1_data_B_sram_head);
 
 	/* add mutex operation */
@@ -472,7 +510,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1_inst_sram_lock, flags);
 
-	addr = _l1_sram_alloc(size, &free_l1_inst_sram_head,
+	addr = _sram_alloc(size, &free_l1_inst_sram_head,
 			&used_l1_inst_sram_head);
 
 	/* add mutex operation */
@@ -497,7 +535,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1_inst_sram_lock, flags);
 
-	ret = _l1_sram_free(addr, &free_l1_inst_sram_head,
+	ret = _sram_free(addr, &free_l1_inst_sram_head,
 			&used_l1_inst_sram_head);
 
 	/* add mutex operation */
@@ -519,7 +557,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1sram_lock, flags);
 
-	addr = _l1_sram_alloc(size, &free_l1_ssram_head,
+	addr = _sram_alloc(size, &free_l1_ssram_head,
 			&used_l1_ssram_head);
 
 	/* add mutex operation */
@@ -537,7 +575,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1sram_lock, flags);
 
-	addr = _l1_sram_alloc_max(&free_l1_ssram_head,
+	addr = _sram_alloc_max(&free_l1_ssram_head,
 			&used_l1_ssram_head, psize);
 
 	/* add mutex operation */
@@ -555,7 +593,7 @@
 	/* add mutex operation */
 	spin_lock_irqsave(&l1sram_lock, flags);
 
-	ret = _l1_sram_free(addr, &free_l1_ssram_head,
+	ret = _sram_free(addr, &free_l1_ssram_head,
 			&used_l1_ssram_head);
 
 	/* add mutex operation */
@@ -564,6 +602,64 @@
 	return ret;
 }
 
+void *l2_sram_alloc(size_t size)
+{
+#ifdef L2_LENGTH
+	unsigned flags;
+	void *addr;
+
+	/* add mutex operation */
+	spin_lock_irqsave(&l2_sram_lock, flags);
+
+	addr = _sram_alloc(size, &free_l2_sram_head,
+			&used_l2_sram_head);
+
+	/* add mutex operation */
+	spin_unlock_irqrestore(&l2_sram_lock, flags);
+
+	pr_debug("Allocated address in l2_sram_alloc is 0x%lx+0x%lx\n",
+		 (long unsigned int)addr, size);
+
+	return addr;
+#else
+	return NULL;
+#endif
+}
+EXPORT_SYMBOL(l2_sram_alloc);
+
+void *l2_sram_zalloc(size_t size)
+{
+	void *addr = l2_sram_alloc(size);
+
+	if (addr)
+		memset(addr, 0x00, size);
+
+	return addr;
+}
+EXPORT_SYMBOL(l2_sram_zalloc);
+
+int l2_sram_free(const void *addr)
+{
+#ifdef L2_LENGTH
+	unsigned flags;
+	int ret;
+
+	/* add mutex operation */
+	spin_lock_irqsave(&l2_sram_lock, flags);
+
+	ret = _sram_free(addr, &free_l2_sram_head,
+			&used_l2_sram_head);
+
+	/* add mutex operation */
+	spin_unlock_irqrestore(&l2_sram_lock, flags);
+
+	return ret;
+#else
+	return -1;
+#endif
+}
+EXPORT_SYMBOL(l2_sram_free);
+
 int sram_free_with_lsl(const void *addr)
 {
 	struct sram_list_struct *lsl, **tmp;
@@ -602,6 +698,9 @@
 	if (addr == NULL && (flags & L1_DATA_B_SRAM))
 		addr = l1_data_B_sram_alloc(size);
 
+	if (addr == NULL && (flags & L2_SRAM))
+		addr = l2_sram_alloc(size);
+
 	if (addr == NULL) {
 		kfree(lsl);
 		return NULL;
@@ -621,7 +720,7 @@
 /* Need to keep line of output the same.  Currently, that is 44 bytes
  * (including newline).
  */
-static int _l1sram_proc_read(char *buf, int *len, int count, const char *desc,
+static int _sram_proc_read(char *buf, int *len, int count, const char *desc,
 		struct sram_piece *pfree_head,
 		struct sram_piece *pused_head)
 {
@@ -630,13 +729,13 @@
 	if (!pfree_head || !pused_head)
 		return -1;
 
-	*len += sprintf(&buf[*len], "--- L1 %-14s Size   PID State     \n", desc);
+	*len += sprintf(&buf[*len], "--- SRAM %-14s Size   PID State     \n", desc);
 
 	/* search the relevant memory slot */
 	pslot = pused_head->next;
 
 	while (pslot != NULL) {
-		*len += sprintf(&buf[*len], "%p-%p %8i %5i %-10s\n",
+		*len += sprintf(&buf[*len], "%p-%p %10i %5i %-10s\n",
 			pslot->paddr, pslot->paddr + pslot->size,
 			pslot->size, pslot->pid, "ALLOCATED");
 
@@ -646,7 +745,7 @@
 	pslot = pfree_head->next;
 
 	while (pslot != NULL) {
-		*len += sprintf(&buf[*len], "%p-%p %8i %5i %-10s\n",
+		*len += sprintf(&buf[*len], "%p-%p %10i %5i %-10s\n",
 			pslot->paddr, pslot->paddr + pslot->size,
 			pslot->size, pslot->pid, "FREE");
 
@@ -655,38 +754,43 @@
 
 	return 0;
 }
-static int l1sram_proc_read(char *buf, char **start, off_t offset, int count,
+static int sram_proc_read(char *buf, char **start, off_t offset, int count,
 		int *eof, void *data)
 {
 	int len = 0;
 
-	if (_l1sram_proc_read(buf, &len, count, "Scratchpad",
+	if (_sram_proc_read(buf, &len, count, "Scratchpad",
 			&free_l1_ssram_head, &used_l1_ssram_head))
 		goto not_done;
 #if L1_DATA_A_LENGTH != 0
-	if (_l1sram_proc_read(buf, &len, count, "Data A",
+	if (_sram_proc_read(buf, &len, count, "L1 Data A",
 			&free_l1_data_A_sram_head,
 			&used_l1_data_A_sram_head))
 		goto not_done;
 #endif
 #if L1_DATA_B_LENGTH != 0
-	if (_l1sram_proc_read(buf, &len, count, "Data B",
+	if (_sram_proc_read(buf, &len, count, "L1 Data B",
 			&free_l1_data_B_sram_head,
 			&used_l1_data_B_sram_head))
 		goto not_done;
 #endif
 #if L1_CODE_LENGTH != 0
-	if (_l1sram_proc_read(buf, &len, count, "Instruction",
+	if (_sram_proc_read(buf, &len, count, "L1 Instruction",
 			&free_l1_inst_sram_head, &used_l1_inst_sram_head))
 		goto not_done;
 #endif
+#ifdef L2_LENGTH
+	if (_sram_proc_read(buf, &len, count, "L2",
+			&free_l2_sram_head, &used_l2_sram_head))
+		goto not_done;
+#endif
 
 	*eof = 1;
  not_done:
 	return len;
 }
 
-static int __init l1sram_proc_init(void)
+static int __init sram_proc_init(void)
 {
 	struct proc_dir_entry *ptr;
 	ptr = create_proc_entry("sram", S_IFREG | S_IRUGO, NULL);
@@ -695,8 +799,8 @@
 		return -1;
 	}
 	ptr->owner = THIS_MODULE;
-	ptr->read_proc = l1sram_proc_read;
+	ptr->read_proc = sram_proc_read;
 	return 0;
 }
-late_initcall(l1sram_proc_init);
+late_initcall(sram_proc_init);
 #endif