crypto: ccp - Add abstraction for device-specific calls

Support for different generations of the coprocessor
requires that an abstraction layer be implemented for
interacting with the hardware. This patch splits out
version-specific functions to a separate file and populates
the version structure (acting as a driver) with function
pointers.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
new file mode 100644
index 0000000..7d5eab4
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -0,0 +1,533 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
+{
+	struct ccp_cmd_queue *cmd_q = op->cmd_q;
+	struct ccp_device *ccp = cmd_q->ccp;
+	void __iomem *cr_addr;
+	u32 cr0, cmd;
+	unsigned int i;
+	int ret = 0;
+
+	/* We could read a status register to see how many free slots
+	 * are actually available, but reading that register resets it
+	 * and you could lose some error information.
+	 */
+	cmd_q->free_slots--;
+
+	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
+	      | (op->jobid << REQ0_JOBID_SHIFT)
+	      | REQ0_WAIT_FOR_WRITE;
+
+	if (op->soc)
+		cr0 |= REQ0_STOP_ON_COMPLETE
+		       | REQ0_INT_ON_COMPLETE;
+
+	if (op->ioc || !cmd_q->free_slots)
+		cr0 |= REQ0_INT_ON_COMPLETE;
+
+	/* Start at CMD_REQ1 */
+	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
+
+	mutex_lock(&ccp->req_mutex);
+
+	/* Write CMD_REQ1 through CMD_REQx first */
+	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
+		iowrite32(*(cr + i), cr_addr);
+
+	/* Tell the CCP to start */
+	wmb();
+	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
+
+	mutex_unlock(&ccp->req_mutex);
+
+	if (cr0 & REQ0_INT_ON_COMPLETE) {
+		/* Wait for the job to complete */
+		ret = wait_event_interruptible(cmd_q->int_queue,
+					       cmd_q->int_rcvd);
+		if (ret || cmd_q->cmd_error) {
+			/* On error delete all related jobs from the queue */
+			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+
+			if (!ret)
+				ret = -EIO;
+		} else if (op->soc) {
+			/* Delete just head job from the queue on SoC */
+			cmd = DEL_Q_ACTIVE
+			      | (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+		}
+
+		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
+
+		cmd_q->int_rcvd = 0;
+	}
+
+	return ret;
+}
+
+static int ccp_perform_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
+		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
+		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
+		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->u.aes.mode == CCP_AES_MODE_CFB)
+		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_xts_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
+		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
+		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_sha(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
+		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
+		| REQ1_INIT;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+
+	if (op->eom) {
+		cr[0] |= REQ1_EOM;
+		cr[4] = lower_32_bits(op->u.sha.msg_bits);
+		cr[5] = upper_32_bits(op->u.sha.msg_bits);
+	} else {
+		cr[4] = 0;
+		cr[5] = 0;
+	}
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_rsa(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
+		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->u.rsa.input_len - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_passthru(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
+		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
+		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
+		cr[1] = op->src.u.dma.length - 1;
+	else
+		cr[1] = op->dst.u.dma.length - 1;
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+		cr[2] = ccp_addr_lo(&op->src.u.dma);
+		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->src.u.dma);
+
+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
+	} else {
+		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
+		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
+	}
+
+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+		cr[4] = ccp_addr_lo(&op->dst.u.dma);
+		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->dst.u.dma);
+	} else {
+		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
+		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
+	}
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_ecc(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = REQ1_ECC_AFFINE_CONVERT
+		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
+		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+	u32 trng_value;
+	int len = min_t(int, sizeof(trng_value), max);
+
+	/*
+	 * Locking is provided by the caller so we can update device
+	 * hwrng-related fields safely
+	 */
+	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+	if (!trng_value) {
+		/* Zero is returned if not data is available or if a
+		 * bad-entropy error is present. Assume an error if
+		 * we exceed TRNG_RETRIES reads of zero.
+		 */
+		if (ccp->hwrng_retries++ > TRNG_RETRIES)
+			return -EIO;
+
+		return 0;
+	}
+
+	/* Reset the counter and save the rng value */
+	ccp->hwrng_retries = 0;
+	memcpy(data, &trng_value, len);
+
+	return len;
+}
+
+static int ccp_init(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct dma_pool *dma_pool;
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	unsigned int qmr, qim, i;
+	int ret;
+
+	/* Find available queues */
+	qim = 0;
+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	for (i = 0; i < MAX_HW_QUEUES; i++) {
+		if (!(qmr & (1 << i)))
+			continue;
+
+		/* Allocate a dma pool for this queue */
+		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+			 ccp->name, i);
+		dma_pool = dma_pool_create(dma_pool_name, dev,
+					   CCP_DMAPOOL_MAX_SIZE,
+					   CCP_DMAPOOL_ALIGN, 0);
+		if (!dma_pool) {
+			dev_err(dev, "unable to allocate dma pool\n");
+			ret = -ENOMEM;
+			goto e_pool;
+		}
+
+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+		ccp->cmd_q_count++;
+
+		cmd_q->ccp = ccp;
+		cmd_q->id = i;
+		cmd_q->dma_pool = dma_pool;
+
+		/* Reserve 2 KSB regions for the queue */
+		cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
+		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
+		ccp->ksb_count -= 2;
+
+		/* Preset some register values and masks that are queue
+		 * number dependent
+		 */
+		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
+				    (CMD_Q_STATUS_INCR * i);
+		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
+					(CMD_Q_STATUS_INCR * i);
+		cmd_q->int_ok = 1 << (i * 2);
+		cmd_q->int_err = 1 << ((i * 2) + 1);
+
+		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+
+		init_waitqueue_head(&cmd_q->int_queue);
+
+		/* Build queue interrupt mask (two interrupts per queue) */
+		qim |= cmd_q->int_ok | cmd_q->int_err;
+
+#ifdef CONFIG_ARM64
+		/* For arm64 set the recommended queue cache settings */
+		iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
+			  (CMD_Q_CACHE_INC * i));
+#endif
+
+		dev_dbg(dev, "queue #%u available\n", i);
+	}
+	if (ccp->cmd_q_count == 0) {
+		dev_notice(dev, "no command queues available\n");
+		ret = -EIO;
+		goto e_pool;
+	}
+	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+	/* Disable and clear interrupts until ready */
+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	/* Request an irq */
+	ret = ccp->get_irq(ccp);
+	if (ret) {
+		dev_err(dev, "unable to allocate an IRQ\n");
+		goto e_pool;
+	}
+
+	/* Initialize the queues used to wait for KSB space and suspend */
+	init_waitqueue_head(&ccp->ksb_queue);
+	init_waitqueue_head(&ccp->suspend_queue);
+
+	/* Create a kthread for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct task_struct *kthread;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+					 "%s-q%u", ccp->name, cmd_q->id);
+		if (IS_ERR(kthread)) {
+			dev_err(dev, "error creating queue thread (%ld)\n",
+				PTR_ERR(kthread));
+			ret = PTR_ERR(kthread);
+			goto e_kthread;
+		}
+
+		cmd_q->kthread = kthread;
+		wake_up_process(kthread);
+	}
+
+	/* Register the RNG */
+	ccp->hwrng.name = ccp->rngname;
+	ccp->hwrng.read = ccp_trng_read;
+	ret = hwrng_register(&ccp->hwrng);
+	if (ret) {
+		dev_err(dev, "error registering hwrng (%d)\n", ret);
+		goto e_kthread;
+	}
+
+	ccp_add_device(ccp);
+
+	/* Enable interrupts */
+	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+
+	return 0;
+
+e_kthread:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	ccp->free_irq(ccp);
+
+e_pool:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	return ret;
+}
+
+static void ccp_destroy(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	struct ccp_cmd *cmd;
+	unsigned int qim, i;
+
+	/* Remove this device from the list of available units first */
+	ccp_del_device(ccp);
+
+	/* Unregister the RNG */
+	hwrng_unregister(&ccp->hwrng);
+
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	/* Build queue interrupt mask (two interrupt masks per queue) */
+	qim = 0;
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+		qim |= cmd_q->int_ok | cmd_q->int_err;
+	}
+
+	/* Disable and clear interrupts */
+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	ccp->free_irq(ccp);
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	/* Flush the cmd and backlog queue */
+	while (!list_empty(&ccp->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+	while (!list_empty(&ccp->backlog)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+}
+
+static irqreturn_t ccp_irq_handler(int irq, void *data)
+{
+	struct device *dev = data;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_cmd_queue *cmd_q;
+	u32 q_int, status;
+	unsigned int i;
+
+	status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		q_int = status & (cmd_q->int_ok | cmd_q->int_err);
+		if (q_int) {
+			cmd_q->int_status = status;
+			cmd_q->q_status = ioread32(cmd_q->reg_status);
+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+			/* On error, only save the first error value */
+			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+			cmd_q->int_rcvd = 1;
+
+			/* Acknowledge the interrupt and wake the kthread */
+			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
+			wake_up_interruptible(&cmd_q->int_queue);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct ccp_actions ccp3_actions = {
+	.perform_aes = ccp_perform_aes,
+	.perform_xts_aes = ccp_perform_xts_aes,
+	.perform_sha = ccp_perform_sha,
+	.perform_rsa = ccp_perform_rsa,
+	.perform_passthru = ccp_perform_passthru,
+	.perform_ecc = ccp_perform_ecc,
+	.init = ccp_init,
+	.destroy = ccp_destroy,
+	.irqhandler = ccp_irq_handler,
+};
+
+struct ccp_vdata ccpv3 = {
+	.version = CCP_VERSION(3, 0),
+	.perform = &ccp3_actions,
+};