gru: preload tlb for bcopy instructions

Add anticipatory TLB dropins for GRU TLB misses that occur on BCOPY
instructions that copy large amounts of data.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 7d757e9..a1b3a1d 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -290,6 +290,61 @@
 
 
 /*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+	if (unlikely(cbe)) {
+		cbe->cbrexecstatus = 0;         /* make CL dirty */
+		gru_flush_cache(cbe);
+	}
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload  <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+			struct gru_thread_state *gts, int atomic,
+			unsigned long fault_vaddr, int asid, int write,
+			unsigned char tlb_preload_count,
+			struct gru_tlb_fault_handle *tfh,
+			struct gru_control_block_extended *cbe)
+{
+	unsigned long vaddr = 0, gpa;
+	int ret, pageshift;
+
+	if (cbe->opccpy != OP_BCOPY)
+		return;
+
+	if (fault_vaddr == cbe->cbe_baddr0)
+		vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+	else if (fault_vaddr == cbe->cbe_baddr1)
+		vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+	fault_vaddr &= PAGE_MASK;
+	vaddr &= PAGE_MASK;
+	vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+	while (vaddr > fault_vaddr) {
+		ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+		if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+					  GRU_PAGESIZE(pageshift)))
+			return;
+		gru_dbg(grudev,
+			"%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+			atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+			vaddr, asid, write, pageshift, gpa);
+		vaddr -= PAGE_SIZE;
+		STAT(tlb_preload_page);
+	}
+}
+
+/*
  * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
  *	Input:
  *		cb    Address of user CBR. Null if not running in user context
@@ -303,6 +358,8 @@
 			  struct gru_tlb_fault_handle *tfh,
 			  struct gru_instruction_bits *cbk)
 {
+	struct gru_control_block_extended *cbe = NULL;
+	unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
 	int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
 	unsigned long gpa = 0, vaddr = 0;
 
@@ -314,6 +371,14 @@
 	 */
 
 	/*
+	 * Prefetch the CBE if doing TLB preloading
+	 */
+	if (unlikely(tlb_preload_count)) {
+		cbe = gru_tfh_to_cbe(tfh);
+		prefetchw(cbe);
+	}
+
+	/*
 	 * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
 	 * Might be a hardware race OR a stupid user. Ignore FMM because FMM
 	 * is a transient state.
@@ -359,6 +424,12 @@
 			goto failupm;
 		}
 	}
+
+	if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+		gru_preload_tlb(gts->ts_gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+		gru_flush_cache_cbe(cbe);
+	}
+
 	gru_cb_set_istatus_active(cbk);
 	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
 			  GRU_PAGESIZE(pageshift));
@@ -378,11 +449,13 @@
 		tfh_user_polling_mode(tfh);
 	else
 		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
 	return -EAGAIN;
 
 failupm:
 	/* Atomic failure switch CBR to UPM */
 	tfh_user_polling_mode(tfh);
+	gru_flush_cache_cbe(cbe);
 	STAT(tlb_dropin_fail_upm);
 	gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
 	return 1;
@@ -390,6 +463,7 @@
 failfmm:
 	/* FMM state on UPM call */
 	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
 	STAT(tlb_dropin_fail_fmm);
 	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
 	return 0;
@@ -397,6 +471,7 @@
 failnoexception:
 	/* TFH status did not show exception pending */
 	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
 	if (cbk)
 		gru_flush_cache(cbk);
 	STAT(tlb_dropin_fail_no_exception);
@@ -407,6 +482,7 @@
 failidle:
 	/* TFH state was idle  - no miss pending */
 	gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
 	if (cbk)
 		gru_flush_cache(cbk);
 	STAT(tlb_dropin_fail_idle);
@@ -416,6 +492,7 @@
 failinval:
 	/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
 	tfh_exception(tfh);
+	gru_flush_cache_cbe(cbe);
 	STAT(tlb_dropin_fail_invalid);
 	gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
 	return -EFAULT;
@@ -426,6 +503,7 @@
 		tfh_user_polling_mode(tfh);
 	else
 		gru_flush_cache(tfh);
+	gru_flush_cache_cbe(cbe);
 	STAT(tlb_dropin_fail_range_active);
 	gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
 		tfh, vaddr);
@@ -627,7 +705,7 @@
 		excdet.exceptdet1 = cbe->idef3upd;
 		excdet.cbrstate = cbe->cbrstate;
 		excdet.cbrexecstatus = cbe->cbrexecstatus;
-		gru_flush_cache(cbe);
+		gru_flush_cache_cbe(cbe);
 		ret = 0;
 	} else {
 		ret = -EAGAIN;
@@ -770,9 +848,12 @@
 		return -EFAULT;
 	gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
 
-	gts = gru_alloc_locked_gts(req.gseg);
-	if (IS_ERR(gts))
-		return PTR_ERR(gts);
+	gts = gru_find_lock_gts(req.gseg);
+	if (!gts) {
+		gts = gru_alloc_locked_gts(req.gseg);
+		if (IS_ERR(gts))
+			return PTR_ERR(gts);
+	}
 
 	switch (req.op) {
 	case sco_blade_chiplet:
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 9d41208..cb3b4d2 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -152,6 +152,7 @@
 		vdata->vd_dsr_au_count =
 		    GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
 		vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+		vdata->vd_tlb_preload_count = req.tlb_preload_count;
 		ret = 0;
 	}
 	up_write(&current->mm->mmap_sem);
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 66d67d9..2f30bad 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -165,17 +165,20 @@
 	return wait_instruction_complete(tgh, tghop_invalidate);
 }
 
-void tfh_write_only(struct gru_tlb_fault_handle *tfh,
-				  unsigned long pfn, unsigned long vaddr,
-				  int asid, int dirty, int pagesize)
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long paddr, int gaa,
+				  unsigned long vaddr, int asid, int dirty,
+				  int pagesize)
 {
 	tfh->fillasid = asid;
 	tfh->fillvaddr = vaddr;
-	tfh->pfn = pfn;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
 	tfh->dirty = dirty;
 	tfh->pagesize = pagesize;
 	tfh->opc = TFHOP_WRITE_ONLY;
 	start_instruction(tfh);
+	return wait_instruction_complete(tfh, tfhop_write_only);
 }
 
 void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 47b762f..ea584eb 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -164,6 +164,16 @@
 	return vaddr + GRU_SIZE * (2 * pnode  + chiplet);
 }
 
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+					struct gru_tlb_fault_handle *tfh)
+{
+	unsigned long cbe;
+
+	cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+	return (struct gru_control_block_extended*)cbe;
+}
+
+
 
 
 /*
@@ -446,6 +456,12 @@
 	unsigned int cbrexecstatus:8;
 };
 
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0	idef1upd
+#define cbe_baddr1	idef3upd
+#define cbe_src_cl	idef6cpy
+#define cbe_nelemcur	idef5upd
+
 enum gru_cbr_state {
 	CBRSTATE_INACTIVE,
 	CBRSTATE_IDLE,
@@ -493,8 +509,8 @@
 int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
 	unsigned long vaddrmask, int asid, int pagesize, int global, int n,
 	unsigned short ctxbitmap);
-void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn,
-	unsigned long vaddr, int asid, int dirty, int pagesize);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
 void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
 	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
 void tfh_restart(struct gru_tlb_fault_handle *tfh);
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 4da6f56..d9ff028 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -161,7 +161,7 @@
 	down_write(&bs->bs_kgts_sema);
 
 	if (!bs->bs_kgts) {
-		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0);
+		bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
 		bs->bs_kgts->ts_user_blade_id = blade_id;
 	}
 	kgts = bs->bs_kgts;
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
index e033b36..c6928af 100644
--- a/drivers/misc/sgi-gru/grulib.h
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -86,6 +86,7 @@
 	unsigned int		control_blocks;
 	unsigned int		maximum_thread_count;
 	unsigned int		options;
+	unsigned char		tlb_preload_count;
 };
 
 /*
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ebabbdc..ade0925 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -316,7 +316,8 @@
  * Allocate a thread state structure.
  */
 struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
-		int cbr_au_count, int dsr_au_count, int options, int tsid)
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid)
 {
 	struct gru_thread_state *gts;
 	struct gru_mm_struct *gms;
@@ -334,6 +335,7 @@
 	mutex_init(&gts->ts_ctxlock);
 	gts->ts_cbr_au_count = cbr_au_count;
 	gts->ts_dsr_au_count = dsr_au_count;
+	gts->ts_tlb_preload_count = tlb_preload_count;
 	gts->ts_user_options = options;
 	gts->ts_user_blade_id = -1;
 	gts->ts_user_chiplet_id = -1;
@@ -403,7 +405,9 @@
 	struct gru_vma_data *vdata = vma->vm_private_data;
 	struct gru_thread_state *gts, *ngts;
 
-	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, vdata->vd_dsr_au_count,
+	gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+			    vdata->vd_dsr_au_count,
+			    vdata->vd_tlb_preload_count,
 			    vdata->vd_user_options, tsid);
 	if (IS_ERR(gts))
 		return gts;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 0a57ab2..54a5a1c 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -76,6 +76,7 @@
 	printstat(s, check_context_retarget_intr);
 	printstat(s, check_context_unload);
 	printstat(s, tlb_dropin);
+	printstat(s, tlb_preload_page);
 	printstat(s, tlb_dropin_fail_no_asid);
 	printstat(s, tlb_dropin_fail_upm);
 	printstat(s, tlb_dropin_fail_invalid);
@@ -127,7 +128,8 @@
 	int op;
 	unsigned long total, count, max;
 	static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
-		"cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"};
+		"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+		"tfh_write_restart", "tgh_invalidate"};
 
 	seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
 	for (op = 0; op < mcsop_last; op++) {
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 76fe298..adaf691 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -202,6 +202,7 @@
 	atomic_long_t check_context_retarget_intr;
 	atomic_long_t check_context_unload;
 	atomic_long_t tlb_dropin;
+	atomic_long_t tlb_preload_page;
 	atomic_long_t tlb_dropin_fail_no_asid;
 	atomic_long_t tlb_dropin_fail_upm;
 	atomic_long_t tlb_dropin_fail_invalid;
@@ -245,7 +246,8 @@
 };
 
 enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
-	cchop_deallocate, tghop_invalidate, mcsop_last};
+	cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+	tghop_invalidate, mcsop_last};
 
 struct mcs_op_statistic {
 	atomic_long_t	count;
@@ -335,6 +337,7 @@
 	long			vd_user_options;/* misc user option flags */
 	int			vd_cbr_au_count;
 	int			vd_dsr_au_count;
+	unsigned char		vd_tlb_preload_count;
 };
 
 /*
@@ -350,6 +353,7 @@
 	struct gru_state	*ts_gru;	/* GRU where the context is
 						   loaded */
 	struct gru_mm_struct	*ts_gms;	/* asid & ioproc struct */
+	unsigned char		ts_tlb_preload_count; /* TLB preload pages */
 	unsigned long		ts_cbr_map;	/* map of allocated CBRs */
 	unsigned long		ts_dsr_map;	/* map of allocated DATA
 						   resources */
@@ -661,7 +665,8 @@
 extern void gru_proc_exit(void);
 
 extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
-		int cbr_au_count, int dsr_au_count, int options, int tsid);
+		int cbr_au_count, int dsr_au_count,
+		unsigned char tlb_preload_count, int options, int tsid);
 extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
 		int cbr_au_count, char *cbmap);
 extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,