KVM: s390: Add support for channel I/O instructions.

Add a new capability, KVM_CAP_S390_CSS_SUPPORT, which will pass
intercepts for channel I/O instructions to userspace. Only I/O
instructions interacting with I/O interrupts need to be handled
in-kernel:

- TEST PENDING INTERRUPTION (tpi) dequeues and stores pending
  interrupts entirely in-kernel.
- TEST SUBCHANNEL (tsch) dequeues pending interrupts in-kernel
  and exits via KVM_EXIT_S390_TSCH to userspace for subchannel-
  related processing.

Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 29363d1..16bd5d1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -262,6 +262,7 @@
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct gmap *gmap;
+	int css_support;
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 71af87d..f26ff1e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -264,6 +264,7 @@
 	[0x0C >> 2] = handle_instruction_and_prog,
 	[0x10 >> 2] = handle_noop,
 	[0x14 >> 2] = handle_noop,
+	[0x18 >> 2] = handle_noop,
 	[0x1C >> 2] = kvm_s390_handle_wait,
 	[0x20 >> 2] = handle_validity,
 	[0x28 >> 2] = handle_stop,
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b3b4748..9a12835 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -709,6 +709,43 @@
 	return 0;
 }
 
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 cr6, u64 schid)
+{
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti, *iter;
+
+	if ((!schid && !cr6) || (schid && cr6))
+		return NULL;
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	inti = NULL;
+	list_for_each_entry(iter, &fi->list, list) {
+		if (!is_ioint(iter->type))
+			continue;
+		if (cr6 && ((cr6 & iter->io.io_int_word) == 0))
+			continue;
+		if (schid) {
+			if (((schid & 0x00000000ffff0000) >> 16) !=
+			    iter->io.subchannel_id)
+				continue;
+			if ((schid & 0x000000000000ffff) !=
+			    iter->io.subchannel_nr)
+				continue;
+		}
+		inti = iter;
+		break;
+	}
+	if (inti)
+		list_del_init(&inti->list);
+	if (list_empty(&fi->list))
+		atomic_set(&fi->active, 0);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+	return inti;
+}
+
 int kvm_s390_inject_vm(struct kvm *kvm,
 		       struct kvm_s390_interrupt *s390int)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5ff2603..5b01f09 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -141,6 +141,7 @@
 	case KVM_CAP_SYNC_REGS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_S390_CSS_SUPPORT:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -235,6 +236,9 @@
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
 	}
+
+	kvm->arch.css_support = 0;
+
 	return 0;
 out_nogmap:
 	debug_unregister(kvm->arch.dbf);
@@ -658,6 +662,7 @@
 	case KVM_EXIT_INTR:
 	case KVM_EXIT_S390_RESET:
 	case KVM_EXIT_S390_UCONTROL:
+	case KVM_EXIT_S390_TSCH:
 		break;
 	default:
 		BUG();
@@ -818,6 +823,13 @@
 		return -EINVAL;
 
 	switch (cap->cap) {
+	case KVM_CAP_S390_CSS_SUPPORT:
+		if (!vcpu->kvm->arch.css_support) {
+			vcpu->kvm->arch.css_support = 1;
+			trace_kvm_s390_enable_css(vcpu->kvm);
+		}
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 211b340..3e05def 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -113,6 +113,8 @@
 		struct kvm_s390_interrupt *s390int);
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 cr6, u64 schid);
 
 /* implemented in priv.c */
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 8ad776f..0ef9894 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -127,13 +127,96 @@
 	return 0;
 }
 
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+	u64 addr;
+	struct kvm_s390_interrupt_info *inti;
+	int cc;
+
+	addr = kvm_s390_get_base_disp_s(vcpu);
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
+	if (inti) {
+		if (addr) {
+			/*
+			 * Store the two-word I/O interruption code into the
+			 * provided area.
+			 */
+			put_guest_u16(vcpu, addr, inti->io.subchannel_id);
+			put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr);
+			put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm);
+		} else {
+			/*
+			 * Store the three-word I/O interruption code into
+			 * the appropriate lowcore area.
+			 */
+			put_guest_u16(vcpu, 184, inti->io.subchannel_id);
+			put_guest_u16(vcpu, 186, inti->io.subchannel_nr);
+			put_guest_u32(vcpu, 188, inti->io.io_int_parm);
+			put_guest_u32(vcpu, 192, inti->io.io_int_word);
+		}
+		cc = 1;
+	} else
+		cc = 0;
+	kfree(inti);
+	/* Set condition code and we're done. */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
+	return 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti;
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, 0,
+				   vcpu->run->s.regs.gprs[1]);
+
+	/*
+	 * Prepare exit to userspace.
+	 * We indicate whether we dequeued a pending I/O interrupt
+	 * so that userspace can re-inject it if the instruction gets
+	 * a program check. While this may re-order the pending I/O
+	 * interrupts, this is no problem since the priority is kept
+	 * intact.
+	 */
+	vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+	vcpu->run->s390_tsch.dequeued = !!inti;
+	if (inti) {
+		vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+		vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+		vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+		vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+	}
+	vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+	kfree(inti);
+	return -EREMOTE;
+}
+
 static int handle_io_inst(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
-	/* condition code 3 */
-	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
-	return 0;
+
+	if (vcpu->kvm->arch.css_support) {
+		/*
+		 * Most I/O instructions will be handled by userspace.
+		 * Exceptions are tpi and the interrupt portion of tsch.
+		 */
+		if (vcpu->arch.sie_block->ipa == 0xb236)
+			return handle_tpi(vcpu);
+		if (vcpu->arch.sie_block->ipa == 0xb235)
+			return handle_tsch(vcpu);
+		/* Handle in userspace. */
+		return -EOPNOTSUPP;
+	} else {
+		/*
+		 * Set condition code 3 to stop the guest from issueing channel
+		 * I/O instructions.
+		 */
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+		return 0;
+	}
 }
 
 static int handle_stfl(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 95fbc1ab..13f30f5 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -204,6 +204,26 @@
 	);
 
 
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+	    TP_PROTO(void *kvm),
+	    TP_ARGS(kvm),
+
+	    TP_STRUCT__entry(
+		    __field(void *, kvm)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->kvm = kvm;
+		    ),
+
+	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
+		      __entry->kvm)
+	);
+
+
 #endif /* _TRACE_KVMS390_H */
 
 /* This part must be outside protection */