Blame - lib/raid6/neon.uc - kernel/msm-4.9

blob: 4fa51b761dd0cb4e0ef6c82fbacff87dfe20383f [file] [log] [blame]

Ard Biesheuvel	7d11965	2013-05-16 17:20:32 +0200	[diff] [blame]	1	/* -----------------------------------------------------------------------
				2	*
				3	* neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
				4	*
				5	* Copyright (C) 2012 Rob Herring
Ard Biesheuvel	0e833e6	2015-07-01 12:19:56 +1000	[diff] [blame]	6	* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
Ard Biesheuvel	7d11965	2013-05-16 17:20:32 +0200	[diff] [blame]	7	*
				8	* Based on altivec.uc:
				9	* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
				10	*
				11	* This program is free software; you can redistribute it and/or modify
				12	* it under the terms of the GNU General Public License as published by
				13	* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
				14	* Boston MA 02111-1307, USA; either version 2 of the License, or
				15	* (at your option) any later version; incorporated herein by reference.
				16	*
				17	* ----------------------------------------------------------------------- */
				18
				19	/*
				20	* neon$#.c
				21	*
				22	* $#-way unrolled NEON intrinsics math RAID-6 instruction set
				23	*
				24	* This file is postprocessed using unroll.awk
				25	*/
				26
				27	#include <arm_neon.h>
				28
				29	typedef uint8x16_t unative_t;
				30
				31	#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
				32	#define NSIZE sizeof(unative_t)
				33
				34	/*
				35	* The SHLBYTE() operation shifts each byte left by 1, not
				36	* rolling over into the next byte
				37	*/
				38	static inline unative_t SHLBYTE(unative_t v)
				39	{
				40	return vshlq_n_u8(v, 1);
				41	}
				42
				43	/*
				44	* The MASK() operation returns 0xFF in any byte for which the high
				45	* bit is 1, 0x00 for any byte for which the high bit is 0.
				46	*/
				47	static inline unative_t MASK(unative_t v)
				48	{
				49	const uint8x16_t temp = NBYTES(0);
				50	return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
				51	}
				52
				53	void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
				54	{
				55	uint8_t dptr = (uint8_t )ptrs;
				56	uint8_t p, q;
				57	int d, z, z0;
				58
				59	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
				60	const unative_t x1d = NBYTES(0x1d);
				61
				62	z0 = disks - 3; /* Highest data disk */
				63	p = dptr[z0+1]; /* XOR parity */
				64	q = dptr[z0+2]; /* RS syndrome */
				65
				66	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
				67	wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
				68	for ( z = z0-1 ; z >= 0 ; z-- ) {
				69	wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
				70	wp$$ = veorq_u8(wp$$, wd$$);
				71	w2$$ = MASK(wq$$);
				72	w1$$ = SHLBYTE(wq$$);
				73
				74	w2$$ = vandq_u8(w2$$, x1d);
				75	w1$$ = veorq_u8(w1$$, w2$$);
				76	wq$$ = veorq_u8(w1$$, wd$$);
				77	}
				78	vst1q_u8(&p[d+NSIZE*$$], wp$$);
				79	vst1q_u8(&q[d+NSIZE*$$], wq$$);
				80	}
				81	}
Ard Biesheuvel	0e833e6	2015-07-01 12:19:56 +1000	[diff] [blame]	82
				83	void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
				84	unsigned long bytes, void **ptrs)
				85	{
				86	uint8_t dptr = (uint8_t )ptrs;
				87	uint8_t p, q;
				88	int d, z, z0;
				89
				90	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
				91	const unative_t x1d = NBYTES(0x1d);
				92
				93	z0 = stop; /* P/Q right side optimization */
				94	p = dptr[disks-2]; /* XOR parity */
				95	q = dptr[disks-1]; /* RS syndrome */
				96
				97	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
				98	wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
				99	wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);
				100
				101	/* P/Q data pages */
				102	for ( z = z0-1 ; z >= start ; z-- ) {
				103	wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
				104	wp$$ = veorq_u8(wp$$, wd$$);
				105	w2$$ = MASK(wq$$);
				106	w1$$ = SHLBYTE(wq$$);
				107
				108	w2$$ = vandq_u8(w2$$, x1d);
				109	w1$$ = veorq_u8(w1$$, w2$$);
				110	wq$$ = veorq_u8(w1$$, wd$$);
				111	}
				112	/* P/Q left side optimization */
				113	for ( z = start-1 ; z >= 0 ; z-- ) {
				114	w2$$ = MASK(wq$$);
				115	w1$$ = SHLBYTE(wq$$);
				116
				117	w2$$ = vandq_u8(w2$$, x1d);
				118	wq$$ = veorq_u8(w1$$, w2$$);
				119	}
				120	w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
				121	wq$$ = veorq_u8(wq$$, w1$$);
				122
				123	vst1q_u8(&p[d+NSIZE*$$], wp$$);
				124	vst1q_u8(&q[d+NSIZE*$$], wq$$);
				125	}
				126	}