Blame - arch/arm64/crypto/ghash-ce-core.S - kernel/msm-4.19

blob: b9e6eaf41c9be14c5f5269477203e9eaf5565eda [file] [log] [blame]

Ard Biesheuvel	fdd2389	2014-03-26 20:53:05 +0100	[diff] [blame]	1	/*
				2	* Accelerated GHASH implementation with ARMv8 PMULL instructions.
				3	*
				4	* Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
				5	*
				6	* Based on arch/x86/crypto/ghash-pmullni-intel_asm.S
				7	*
				8	* Copyright (c) 2009 Intel Corp.
				9	* Author: Huang Ying <ying.huang@intel.com>
				10	* Vinodh Gopal
				11	* Erdinc Ozturk
				12	* Deniz Karakoyunlu
				13	*
				14	* This program is free software; you can redistribute it and/or modify it
				15	* under the terms of the GNU General Public License version 2 as published
				16	* by the Free Software Foundation.
				17	*/
				18
				19	#include <linux/linkage.h>
				20	#include <asm/assembler.h>
				21
				22	DATA .req v0
				23	SHASH .req v1
				24	IN1 .req v2
				25	T1 .req v2
				26	T2 .req v3
				27	T3 .req v4
				28	VZR .req v5
				29
				30	.text
				31	.arch armv8-a+crypto
				32
				33	/*
				34	* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
				35	* struct ghash_key const k, const char head)
				36	*/
				37	ENTRY(pmull_ghash_update)
				38	ld1 {DATA.16b}, [x1]
				39	ld1 {SHASH.16b}, [x3]
				40	eor VZR.16b, VZR.16b, VZR.16b
				41
				42	/* do the head block first, if supplied */
				43	cbz x4, 0f
				44	ld1 {IN1.2d}, [x4]
				45	b 1f
				46
				47	0: ld1 {IN1.2d}, [x2], #16
				48	sub w0, w0, #1
				49	1: ext IN1.16b, IN1.16b, IN1.16b, #8
				50	CPU_LE( rev64 IN1.16b, IN1.16b )
				51	eor DATA.16b, DATA.16b, IN1.16b
				52
				53	/* multiply DATA by SHASH in GF(2^128) */
				54	ext T2.16b, DATA.16b, DATA.16b, #8
				55	ext T3.16b, SHASH.16b, SHASH.16b, #8
				56	eor T2.16b, T2.16b, DATA.16b
				57	eor T3.16b, T3.16b, SHASH.16b
				58
				59	pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1
				60	pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0
				61	pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0)
				62	eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0)
				63	eor T2.16b, T2.16b, DATA.16b
				64
				65	ext T3.16b, VZR.16b, T2.16b, #8
				66	ext T2.16b, T2.16b, VZR.16b, #8
				67	eor DATA.16b, DATA.16b, T3.16b
				68	eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of
				69	// carry-less multiplication
				70
				71	/* first phase of the reduction */
				72	shl T3.2d, DATA.2d, #1
				73	eor T3.16b, T3.16b, DATA.16b
				74	shl T3.2d, T3.2d, #5
				75	eor T3.16b, T3.16b, DATA.16b
				76	shl T3.2d, T3.2d, #57
				77	ext T2.16b, VZR.16b, T3.16b, #8
				78	ext T3.16b, T3.16b, VZR.16b, #8
				79	eor DATA.16b, DATA.16b, T2.16b
				80	eor T1.16b, T1.16b, T3.16b
				81
				82	/* second phase of the reduction */
				83	ushr T2.2d, DATA.2d, #5
				84	eor T2.16b, T2.16b, DATA.16b
				85	ushr T2.2d, T2.2d, #1
				86	eor T2.16b, T2.16b, DATA.16b
				87	ushr T2.2d, T2.2d, #1
				88	eor T1.16b, T1.16b, T2.16b
				89	eor DATA.16b, DATA.16b, T1.16b
				90
				91	cbnz w0, 0b
				92
				93	st1 {DATA.16b}, [x1]
				94	ret
				95	ENDPROC(pmull_ghash_update)