Blame - Modules/_decimal/libmpdec/sixstep.c - platform/external/python/cpython3

blob: 7d0542d641dcb8ac9a81ed6fb53c4ce4b30a9fb8 [file] [log] [blame]

Stefan Krah	1919b7e	2012-03-21 18:25:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2008-2012 Stefan Krah. All rights reserved.
				3	*
				4	* Redistribution and use in source and binary forms, with or without
				5	* modification, are permitted provided that the following conditions
				6	* are met:
				7	*
				8	* 1. Redistributions of source code must retain the above copyright
				9	* notice, this list of conditions and the following disclaimer.
				10	*
				11	* 2. Redistributions in binary form must reproduce the above copyright
				12	* notice, this list of conditions and the following disclaimer in the
				13	* documentation and/or other materials provided with the distribution.
				14	*
				15	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
				16	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				17	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				18	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
				19	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				20	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				21	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				22	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				23	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				24	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				25	* SUCH DAMAGE.
				26	*/
				27
				28
				29	#include "mpdecimal.h"
				30	#include <stdio.h>
				31	#include <stdlib.h>
				32	#include <assert.h>
				33	#include "bits.h"
				34	#include "difradix2.h"
				35	#include "numbertheory.h"
				36	#include "transpose.h"
				37	#include "umodarith.h"
				38	#include "sixstep.h"
				39
				40
				41	/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
				42	form 2*n (See literature/six-step.txt). /
				43
				44
				45	/* forward transform with sign = -1 */
				46	int
				47	six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
				48	{
				49	struct fnt_params *tparams;
				50	mpd_size_t log2n, C, R;
				51	mpd_uint_t kernel;
				52	mpd_uint_t umod;
				53	#ifdef PPRO
				54	double dmod;
				55	uint32_t dinvmod[3];
				56	#endif
				57	mpd_uint_t *x, w0, w1, wstep;
				58	mpd_size_t i, k;
				59
				60
				61	assert(ispower2(n));
				62	assert(n >= 16);
				63	assert(n <= MPD_MAXTRANSFORM_2N);
				64
				65	log2n = mpd_bsr(n);
				66	C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
				67	R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
				68
				69
				70	/* Transpose the matrix. */
				71	if (!transpose_pow2(a, R, C)) {
				72	return 0;
				73	}
				74
				75	/* Length R transform on the rows. */
				76	if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
				77	return 0;
				78	}
				79	for (x = a; x < a+n; x += R) {
				80	fnt_dif2(x, R, tparams);
				81	}
				82
				83	/* Transpose the matrix. */
				84	if (!transpose_pow2(a, C, R)) {
				85	mpd_free(tparams);
				86	return 0;
				87	}
				88
				89	/* Multiply each matrix element (addressed by iC+k) by r(ik). */
				90	SETMODULUS(modnum);
				91	kernel = _mpd_getkernel(n, -1, modnum);
				92	for (i = 1; i < R; i++) {
				93	w0 = 1; /* r*(i0): initial value for k=0 */
				94	w1 = POWMOD(kernel, i); /* r*(i1): initial value for k=1 */
				95	wstep = MULMOD(w1, w1); /* r*(2i) */
				96	for (k = 0; k < C; k += 2) {
				97	mpd_uint_t x0 = a[i*C+k];
				98	mpd_uint_t x1 = a[i*C+k+1];
				99	MULMOD2(&x0, w0, &x1, w1);
				100	MULMOD2C(&w0, &w1, wstep); /* r*(i(k+2)) = r*(ik) * r*(2i) */
				101	a[i*C+k] = x0;
				102	a[i*C+k+1] = x1;
				103	}
				104	}
				105
				106	/* Length C transform on the rows. */
				107	if (C != R) {
				108	mpd_free(tparams);
				109	if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) {
				110	return 0;
				111	}
				112	}
				113	for (x = a; x < a+n; x += C) {
				114	fnt_dif2(x, C, tparams);
				115	}
				116	mpd_free(tparams);
				117
Stefan Krah	c64150b	2012-03-23 16:34:41 +0100	[diff] [blame]	118	#if 0
				119	/* An unordered transform is sufficient for convolution. */
Stefan Krah	1919b7e	2012-03-21 18:25:23 +0100	[diff] [blame]	120	/* Transpose the matrix. */
				121	if (!transpose_pow2(a, R, C)) {
				122	return 0;
				123	}
				124	#endif
				125
				126	return 1;
				127	}
				128
				129
				130	/* reverse transform, sign = 1 */
				131	int
				132	inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
				133	{
				134	struct fnt_params *tparams;
				135	mpd_size_t log2n, C, R;
				136	mpd_uint_t kernel;
				137	mpd_uint_t umod;
				138	#ifdef PPRO
				139	double dmod;
				140	uint32_t dinvmod[3];
				141	#endif
				142	mpd_uint_t *x, w0, w1, wstep;
				143	mpd_size_t i, k;
				144
				145
				146	assert(ispower2(n));
				147	assert(n >= 16);
				148	assert(n <= MPD_MAXTRANSFORM_2N);
				149
				150	log2n = mpd_bsr(n);
				151	C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
				152	R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
				153
				154
Stefan Krah	c64150b	2012-03-23 16:34:41 +0100	[diff] [blame]	155	#if 0
				156	/* An unordered transform is sufficient for convolution. */
Stefan Krah	1919b7e	2012-03-21 18:25:23 +0100	[diff] [blame]	157	/* Transpose the matrix, producing an RC matrix. /
				158	if (!transpose_pow2(a, C, R)) {
				159	return 0;
				160	}
				161	#endif
				162
				163	/* Length C transform on the rows. */
				164	if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
				165	return 0;
				166	}
				167	for (x = a; x < a+n; x += C) {
				168	fnt_dif2(x, C, tparams);
				169	}
				170
				171	/* Multiply each matrix element (addressed by iC+k) by r(ik). */
				172	SETMODULUS(modnum);
				173	kernel = _mpd_getkernel(n, 1, modnum);
				174	for (i = 1; i < R; i++) {
				175	w0 = 1;
				176	w1 = POWMOD(kernel, i);
				177	wstep = MULMOD(w1, w1);
				178	for (k = 0; k < C; k += 2) {
				179	mpd_uint_t x0 = a[i*C+k];
				180	mpd_uint_t x1 = a[i*C+k+1];
				181	MULMOD2(&x0, w0, &x1, w1);
				182	MULMOD2C(&w0, &w1, wstep);
				183	a[i*C+k] = x0;
				184	a[i*C+k+1] = x1;
				185	}
				186	}
				187
				188	/* Transpose the matrix. */
				189	if (!transpose_pow2(a, R, C)) {
				190	mpd_free(tparams);
				191	return 0;
				192	}
				193
				194	/* Length R transform on the rows. */
				195	if (R != C) {
				196	mpd_free(tparams);
				197	if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) {
				198	return 0;
				199	}
				200	}
				201	for (x = a; x < a+n; x += R) {
				202	fnt_dif2(x, R, tparams);
				203	}
				204	mpd_free(tparams);
				205
				206	/* Transpose the matrix. */
				207	if (!transpose_pow2(a, C, R)) {
				208	return 0;
				209	}
				210
				211	return 1;
				212	}
				213
				214