Blame - src/f32-ppmm/gen/2x4-minmax-scalar.c - platform/external/XNNPACK

blob: ce023eac2541cc89095bf9615c8566ec50284a5d [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Auto-generated file. Do not edit!
				2	// Template: src/f32-ppmm/scalar.c.in
				3	// Generator: tools/xngen
				4	//
				5	// Copyright 2019 Google LLC
				6	//
				7	// This source code is licensed under the BSD-style license found in the
				8	// LICENSE file in the root directory of this source tree.
				9
				10	#include <assert.h>
				11
				12	#include <xnnpack/math.h>
				13	#include <xnnpack/ppmm.h>
				14
				15
				16	void xnn_f32_ppmm_ukernel_2x4__scalar(
				17	size_t mr,
				18	size_t nc,
				19	size_t kc,
				20	const float*restrict a,
				21	const float*restrict w,
				22	float*restrict c,
				23	size_t cm_stride,
				24	size_t cn_stride,
Marat Dukhan	eb09a6b	2020-04-08 17:34:32 -0700	[diff] [blame]	25	const union xnn_f32_minmax_params params[restrict static 1])
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	26	{
				27	assert(mr != 0);
				28	assert(mr <= 2);
				29	assert(nc != 0);
				30	assert(kc != 0);
				31	assert(kc % sizeof(float) == 0);
				32
				33	float* c0 = c;
				34	float* c1 = (float*) ((uintptr_t) c0 + cm_stride);
				35	if XNN_UNPREDICTABLE(mr != 2) {
				36	c1 = c0;
				37	}
				38
				39	do {
				40	float vacc0x0 = w[0];
				41	float vacc0x1 = w[1];
				42	float vacc0x2 = w[2];
				43	float vacc0x3 = w[3];
				44	float vacc1x0 = vacc0x0;
				45	float vacc1x1 = vacc0x1;
				46	float vacc1x2 = vacc0x2;
				47	float vacc1x3 = vacc0x3;
				48	w += 4;
				49
				50	size_t k = kc;
				51	do {
				52	const float va0 = a[0];
				53	const float va1 = a[1];
				54	a += 2;
				55
				56	const float vb0 = w[0];
				57	const float vb1 = w[1];
				58	const float vb2 = w[2];
				59	const float vb3 = w[3];
				60	w += 4;
				61
				62	vacc0x0 += va0 * vb0;
				63	vacc1x0 += va1 * vb0;
				64	vacc0x1 += va0 * vb1;
				65	vacc1x1 += va1 * vb1;
				66	vacc0x2 += va0 * vb2;
				67	vacc1x2 += va1 * vb2;
				68	vacc0x3 += va0 * vb3;
				69	vacc1x3 += va1 * vb3;
				70
				71	k -= sizeof(float);
				72	} while (k != 0);
				73
				74	const float vmax = params->scalar.max;
				75	vacc0x0 = math_min_f32(vacc0x0, vmax);
				76	vacc1x0 = math_min_f32(vacc1x0, vmax);
				77	vacc0x1 = math_min_f32(vacc0x1, vmax);
				78	vacc1x1 = math_min_f32(vacc1x1, vmax);
				79	vacc0x2 = math_min_f32(vacc0x2, vmax);
				80	vacc1x2 = math_min_f32(vacc1x2, vmax);
				81	vacc0x3 = math_min_f32(vacc0x3, vmax);
				82	vacc1x3 = math_min_f32(vacc1x3, vmax);
				83
				84	const float vmin = params->scalar.min;
				85	vacc0x0 = math_max_f32(vacc0x0, vmin);
				86	vacc1x0 = math_max_f32(vacc1x0, vmin);
				87	vacc0x1 = math_max_f32(vacc0x1, vmin);
				88	vacc1x1 = math_max_f32(vacc1x1, vmin);
				89	vacc0x2 = math_max_f32(vacc0x2, vmin);
				90	vacc1x2 = math_max_f32(vacc1x2, vmin);
				91	vacc0x3 = math_max_f32(vacc0x3, vmin);
				92	vacc1x3 = math_max_f32(vacc1x3, vmin);
				93
				94	if XNN_LIKELY(nc >= 4) {
				95	c1[0] = vacc1x0;
				96	c1[1] = vacc1x1;
				97	c1[2] = vacc1x2;
				98	c1[3] = vacc1x3;
				99	c0[0] = vacc0x0;
				100	c0[1] = vacc0x1;
				101	c0[2] = vacc0x2;
				102	c0[3] = vacc0x3;
				103
				104	a = (const float) ((uintptr_t) a - kc 2);
				105
				106	c1 = (float*) ((uintptr_t) c1 + cn_stride);
				107	c0 = (float*) ((uintptr_t) c0 + cn_stride);
				108
				109	nc -= 4;
				110	} else {
				111	if (nc & 2) {
				112	c1[0] = vacc1x0;
				113	c1[1] = vacc1x1;
				114	c0[0] = vacc0x0;
				115	c0[1] = vacc0x1;
				116
				117	vacc1x0 = vacc1x2;
				118	vacc0x0 = vacc0x2;
				119
				120	c1 += 2;
				121	c0 += 2;
				122	}
				123	if (nc & 1) {
				124	*c1 = vacc1x0;
				125	*c0 = vacc0x0;
				126	}
				127
				128	nc = 0;
				129	}
				130	} while (nc != 0);
				131	}