Grid 0.7.0
Zolotarev.cc
Go to the documentation of this file.
1/* -*- Mode: C; comment-column: 22; fill-column: 79; compile-command: "gcc -o zolotarev zolotarev.c -ansi -pedantic -lm -DTEST"; -*- */
2#define VERSION Source Time-stamp: <2015-05-18 16:32:08 neo>
3
4/* These C routines evalute the optimal rational approximation to the signum
5 * function for epsilon < |x| < 1 using Zolotarev's theorem.
6 *
7 * To obtain reliable results for high degree approximations (large n) it is
8 * necessary to compute using sufficiently high precision arithmetic. To this
9 * end the code has been parameterised to work with the preprocessor names
10 * INTERNAL_PRECISION and PRECISION set to float, double, or long double as
11 * appropriate. INTERNAL_PRECISION is used in computing the Zolotarev
12 * coefficients, which are converted to PRECISION before being returned to the
13 * caller. Presumably even higher precision could be obtained using GMP or
14 * similar package, but bear in mind that rounding errors might also be
15 * significant in evaluating the resulting polynomial. The convergence criteria
16 * have been written in a precision-independent form. */
17
18#include <math.h>
19#include <stdlib.h>
20#include <stdio.h>
21
22#define MAX(a,b) ((a) > (b) ? (a) : (b))
23#define MIN(a,b) ((a) < (b) ? (a) : (b))
24
25#ifndef INTERNAL_PRECISION
26#define INTERNAL_PRECISION double
27#endif
28
29#include "Zolotarev.h"
30#define ZOLOTAREV_INTERNAL
31#undef ZOLOTAREV_DATA
32#define ZOLOTAREV_DATA izd
33#undef ZPRECISION
34#define ZPRECISION INTERNAL_PRECISION
35#include "Zolotarev.h"
36#undef ZOLOTAREV_INTERNAL
37
38/* The ANSI standard appears not to know what pi is */
39
40#ifndef M_PI
41#define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
42169399375105820974944592307816406286208998628034825342117068)
43#endif
44
45#define ZERO ((INTERNAL_PRECISION) 0)
46#define ONE ((INTERNAL_PRECISION) 1)
47#define TWO ((INTERNAL_PRECISION) 2)
48#define THREE ((INTERNAL_PRECISION) 3)
49#define FOUR ((INTERNAL_PRECISION) 4)
50#define HALF (ONE/TWO)
51
52/* The following obscenity seems to be the simplest (?) way to coerce the C
53 * preprocessor to convert the value of a preprocessor token into a string. */
54
55#define PP2(x) #x
56#define PP1(a,b,c) a ## b(c)
57#define STRINGIFY(name) PP1(PP,2,name)
58
59/* Compute the partial fraction expansion coefficients (alpha) from the
60 * factored form */
63
64static void construct_partfrac(izd *z) {
65 int dn = z -> dn, dd = z -> dd, type = z -> type;
66 int j, k, da = dd + 1 + type;
67 INTERNAL_PRECISION A = z -> A, *a = z -> a, *ap = z -> ap, *alpha;
68 alpha = (INTERNAL_PRECISION*) malloc(da * sizeof(INTERNAL_PRECISION));
69 for (j = 0; j < dd; j++)
70 for (k = 0, alpha[j] = A; k < dd; k++)
71 alpha[j] *=
72 (k < dn ? ap[j] - a[k] : ONE) / (k == j ? ONE : ap[j] - ap[k]);
73 if(type == 1) /* implicit pole at zero? */
74 for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] : ONE); k < dd; k++) {
75 alpha[dd] *= a[k] / ap[k];
76 alpha[k] *= (dn > dd ? ap[k] - a[dd] : ONE) / ap[k];
77 }
78 alpha[da-1] = dn == da - 1 ? A : ZERO;
79 z -> alpha = alpha;
80 z -> da = da;
81 return;
82}
83
84/* Convert factored polynomial into dense polynomial. The input is the overall
85 * factor A and the roots a[i], such that p = A product(x - a[i], i = 1..d) */
86
89 int d) {
91 int i, j;
92 p = (INTERNAL_PRECISION *) malloc((d + 2) * sizeof(INTERNAL_PRECISION));
93 p[0] = A;
94 for (i = 0; i < d; i++) {
95 p[i+1] = p[i];
96 for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
97 p[0] *= - a[i];
98 }
99 return p;
100}
101
102/* Convert a rational function of the form R0(x) = x p(x^2)/q(x^2) (type 0) or
103 * R1(x) = p(x^2)/[x q(x^2)] (type 1) into its continued fraction
104 * representation. We assume that 0 <= deg(q) - deg(p) <= 1 for type 0 and 0 <=
105 * deg(p) - deg(q) <= 1 for type 1. On input p and q are in factored form, and
106 * deg(q) = dq, deg(p) = dp. The output is the continued fraction coefficients
107 * beta, where R(x) = beta[0] x + 1/(beta[1] x + 1/(...)).
108 *
109 * The method used is as follows. There are four cases to consider:
110 *
111 * 0.i. Type 0, deg p = deg q
112 *
113 * 0.ii. Type 0, deg p = deg q - 1
114 *
115 * 1.i. Type 1, deg p = deg q
116 *
117 * 1.ii. Type 1, deg p = deg q + 1
118 *
119 * and these are connected by two transformations:
120 *
121 * A. To obtain a continued fraction expansion of type 1 we use a single-step
122 * polynomial division we find beta and r(x) such that p(x) = beta x q(x) +
123 * r(x), with deg(r) = deg(q). This implies that p(x^2) = beta x^2 q(x^2) +
124 * r(x^2), and thus R1(x) = x beta + r(x^2)/(x q(x^2)) = x beta + 1/R0(x)
125 * with R0(x) = x q(x^2)/r(x^2).
126 *
127 * B. A continued fraction expansion of type 0 is obtained in a similar, but
128 * not identical, manner. We use the polynomial division algorithm to compute
129 * the quotient beta and the remainder r that satisfy p(x) = beta q(x) + r(x)
130 * with deg(r) = deg(q) - 1. We thus have x p(x^2) = x beta q(x^2) + x r(x^2),
131 * so R0(x) = x beta + x r(x^2)/q(x^2) = x beta + 1/R1(x) with R1(x) = q(x^2) /
132 * (x r(x^2)).
133 *
134 * Note that the deg(r) must be exactly deg(q) for (A) and deg(q) - 1 for (B)
135 * because p and q have disjoint roots all of multiplicity 1. This means that
136 * the division algorithm requires only a single polynomial subtraction step.
137 *
138 * The transformations between the cases form the following finite state
139 * automaton:
140 *
141 * +------+ +------+ +------+ +------+
142 * | | | | ---(A)---> | | | |
143 * | 0.ii | ---(B)---> | 1.ii | | 0.i | <---(A)--- | 1.i |
144 * | | | | <---(B)--- | | | |
145 * +------+ +------+ +------+ +------+
146 */
147
151 INTERNAL_PRECISION *, int, int);
152
156 INTERNAL_PRECISION *, int, int);
157
158static void construct_contfrac(izd *z){
159 INTERNAL_PRECISION *r, A = z -> A, *p = z -> a, *q = z -> ap;
160 int dp = z -> dn, dq = z -> dd, type = z -> type;
161
162 z -> db = 2 * dq + 1 + type;
163 z -> beta = (INTERNAL_PRECISION *)
164 malloc(z -> db * sizeof(INTERNAL_PRECISION));
165 p = poly_factored_to_dense(A, p, dp);
166 q = poly_factored_to_dense(ONE, q, dq);
167 r = (INTERNAL_PRECISION *) malloc((MAX(dp,dq) + 1) *
168 sizeof(INTERNAL_PRECISION));
169 if (type == 0) (void) contfrac_B(z -> beta, p, q, r, dp, dq);
170 else (void) contfrac_A(z -> beta, p, q, r, dp, dq);
171 free(p); free(q); free(r);
172 return;
173}
174
178 INTERNAL_PRECISION *r, int dp, int dq) {
179 INTERNAL_PRECISION quot, *rb;
180 int j;
181
182 /* p(x) = x beta q(x) + r(x); dp = dq or dp = dq + 1 */
183
184 quot = dp == dq ? ZERO : p[dp] / q[dq];
185 r[0] = p[0];
186 for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
187#ifdef DEBUG
188 printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
189 __FUNCTION__, dp, dq, (float) quot);
190 for (j = 0; j <= dq + 1; j++)
191 printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
192 j, (float) (j > dp ? ZERO : p[j]),
193 j, (float) (j == 0 ? ZERO : q[j-1]),
194 j, (float) (j == dp ? ZERO : r[j]));
195#endif /* DEBUG */
196 *(rb = contfrac_B(beta, q, r, p, dq, dq)) = quot;
197 return rb + 1;
198}
199
203 INTERNAL_PRECISION *r, int dp, int dq) {
204 INTERNAL_PRECISION quot, *rb;
205 int j;
206
207 /* p(x) = beta q(x) + r(x); dp = dq or dp = dq - 1 */
208
209 quot = dp == dq ? p[dp] / q[dq] : ZERO;
210 for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
211#ifdef DEBUG
212 printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
213 __FUNCTION__, dp, dq, (float) quot);
214 for (j = 0; j <= dq; j++)
215 printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
216 j, (float) (j > dp ? ZERO : p[j]),
217 j, (float) q[j],
218 j, (float) (j == dq ? ZERO : r[j]));
219#endif /* DEBUG */
220 *(rb = dq > 0 ? contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
221 return rb + 1;
222}
223
224/* The global variable U is used to hold the argument u throughout the AGM
225 * recursion. The global variables F and K are set in the innermost
226 * instantiation of the recursive function AGM to the values of the elliptic
227 * integrals F(u,k) and K(k) respectively. They must be made thread local to
228 * make this code thread-safe in a multithreaded environment. */
229
230static INTERNAL_PRECISION U, F, K; /* THREAD LOCAL */
231
232/* Recursive implementation of Gauss' arithmetico-geometric mean, which is the
233 * kernel of the method used to compute the Jacobian elliptic functions
234 * sn(u,k), cn(u,k), and dn(u,k) with parameter k (where 0 < k < 1), as well
235 * as the elliptic integral F(s,k) satisfying F(sn(u,k)) = u and the complete
236 * elliptic integral K(k).
237 *
238 * The algorithm used is a recursive implementation of the Gauss (Landen)
239 * transformation.
240 *
241 * The function returns the value of sn(u,k'), where k' is the dual parameter,
242 * and also sets the values of the global variables F and K. The latter is
243 * used to determine the sign of cn(u,k').
244 *
245 * The algorithm is deemed to have converged when b ceases to increase. This
246 * works whatever INTERNAL_PRECISION is specified. */
247
251 static INTERNAL_PRECISION pb = -ONE;
252 INTERNAL_PRECISION c, d, xi;
253
254 if (b <= pb) {
255 pb = -ONE;
256 F = asin(s) / a; /* Here, a is the AGM */
257 K = M_PI / (TWO * a);
258 return sin(U * a);
259 }
260 pb = b;
261 c = a - b;
262 d = a + b;
263 xi = AGM(HALF*d, sqrt(a*b), ONE + c*c == ONE ?
264 HALF*s*d/a : (a - sqrt(a*a - s*s*c*d))/(c*s));
265 return 2*a*xi / (d + c*xi*xi);
266}
267
268/* Computes sn(u,k), cn(u,k), dn(u,k), F(u,k), and K(k). It is essentially a
269 * wrapper for the routine AGM. The sign of cn(u,k) is defined to be -1 if
270 * K(k) < u < 3*K(k) and +1 otherwise, and thus sign is computed by some quite
271 * unnecessarily obfuscated bit manipulations. */
272
276 INTERNAL_PRECISION* elK) {
277 int sgn;
278 U = u;
279 *sn = AGM(ONE, sqrt(ONE - k*k), u);
280 sgn = ((int) (fabs(u) / K)) % 4; /* sgn = 0, 1, 2, 3 */
281 sgn ^= sgn >> 1; /* (sgn & 1) = 0, 1, 1, 0 */
282 sgn = 1 - ((sgn & 1) << 1); /* sgn = 1, -1, -1, 1 */
283 *cn = ((INTERNAL_PRECISION) sgn) * sqrt(ONE - *sn * *sn);
284 *dn = sqrt(ONE - k*k* *sn * *sn);
285 *elF = F;
286 *elK = K;
287}
288
289/* Compute the coefficients for the optimal rational approximation R(x) to
290 * sgn(x) of degree n over the interval epsilon < |x| < 1 using Zolotarev's
291 * formula.
292 *
293 * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
294 * type = 1 for the approximation which is infinite at x = 0. */
295
296zolotarev_data* zolotarev(ZOLO_PRECISION epsilon, int n, int type) {
297 INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
298 l, invlambda, xi, xisq, *tv, s, opl;
299 int m, czero, ts;
300 zolotarev_data *zd;
301 izd *d = (izd*) malloc(sizeof(izd));
302
303 d -> type = type;
304 d -> epsilon = (INTERNAL_PRECISION) epsilon;
305 d -> n = n;
306 d -> dd = n / 2;
307 d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
308 d -> deg_denom = 2 * d -> dd;
309 d -> deg_num = 2 * d -> dn + 1;
310
311 d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
312 sizeof(INTERNAL_PRECISION));
313 d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
314 sizeof(INTERNAL_PRECISION));
315 ksq = d -> epsilon * d -> epsilon;
316 kp = sqrt(ONE - ksq);
317 sncndnFK(ZERO, kp, &sn, &cn, &dn, &F, &Kp); /* compute Kp = K(kp) */
318 z0 = TWO * Kp / (INTERNAL_PRECISION) n;
319 M = ONE;
320 A = ONE / d -> epsilon;
321
322 sncndnFK(HALF * z0, kp, &sn, &cn, &dn, &F, &Kj); /* compute xi */
323 xi = ONE / dn;
324 xisq = xi * xi;
325 invlambda = xi;
326
327 for (m = 0; m < d -> dd; m++) {
328 czero = 2 * (m + 1) == n; /* n even and m = dd -1 */
329
330 z = z0 * ((INTERNAL_PRECISION) m + ONE);
331 sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
332 t = cn / sn;
333 c = - t*t;
334 if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
335
336 z = z0 * ((INTERNAL_PRECISION) m + HALF);
337 sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
338 t = cn / sn;
339 cp = - t*t;
340 (d -> ap)[d -> dd - 1 - m] = ksq / cp;
341
342 M *= (ONE - c) / (ONE - cp);
343 A *= (czero ? -ksq : c) * (ONE - cp) / (cp * (ONE - c));
344 invlambda *= (ONE - c*xisq) / (ONE - cp*xisq);
345 }
346 invlambda /= M;
347 d -> A = TWO / (ONE + invlambda) * A;
348 d -> Delta = (invlambda - ONE) / (invlambda + ONE);
349
350 d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
351 sizeof(INTERNAL_PRECISION));
352 l = ONE / invlambda;
353 opl = ONE + l;
354 sncndnFK(sqrt( d -> type == 1
355 ? (THREE + l) / (FOUR * opl)
356 : (ONE + THREE*l) / (opl*opl*opl)
357 ), sqrt(ONE - l*l), &sn, &cn, &dn, &F, &Kj);
358 s = M * F;
359 for (m = 0; m < d -> n; m++) {
360 sncndnFK(s + TWO*Kp*m/n, kp, &sn, &cn, &dn, &F, &Kj);
361 d -> gamma[m] = d -> epsilon / dn;
362 }
363
364 /* If R(x) is a Zolotarev rational approximation of degree (n,m) with maximum
365 * error Delta, then (1 - Delta^2) / R(x) is also an optimal Chebyshev
366 * approximation of degree (m,n) */
367
368 if (d -> type == 1) {
369 d -> A = (ONE - d -> Delta * d -> Delta) / d -> A;
370 tv = d -> a; d -> a = d -> ap; d -> ap = tv;
371 ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
372 ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
373 }
374
377
378 /* Converting everything to ZOLO_PRECISION for external use only */
379
380 zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
381 zd -> A = (ZOLO_PRECISION) d -> A;
382 zd -> Delta = (ZOLO_PRECISION) d -> Delta;
383 zd -> epsilon = (ZOLO_PRECISION) d -> epsilon;
384 zd -> n = d -> n;
385 zd -> type = d -> type;
386 zd -> dn = d -> dn;
387 zd -> dd = d -> dd;
388 zd -> da = d -> da;
389 zd -> db = d -> db;
390 zd -> deg_num = d -> deg_num;
391 zd -> deg_denom = d -> deg_denom;
392
393 zd -> a = (ZOLO_PRECISION*) malloc(zd -> dn * sizeof(ZOLO_PRECISION));
394 for (m = 0; m < zd -> dn; m++) zd -> a[m] = (ZOLO_PRECISION) d -> a[m];
395 free(d -> a);
396
397 zd -> ap = (ZOLO_PRECISION*) malloc(zd -> dd * sizeof(ZOLO_PRECISION));
398 for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (ZOLO_PRECISION) d -> ap[m];
399 free(d -> ap);
400
401 zd -> alpha = (ZOLO_PRECISION*) malloc(zd -> da * sizeof(ZOLO_PRECISION));
402 for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (ZOLO_PRECISION) d -> alpha[m];
403 free(d -> alpha);
404
405 zd -> beta = (ZOLO_PRECISION*) malloc(zd -> db * sizeof(ZOLO_PRECISION));
406 for (m = 0; m < zd -> db; m++) zd -> beta[m] = (ZOLO_PRECISION) d -> beta[m];
407 free(d -> beta);
408
409 zd -> gamma = (ZOLO_PRECISION*) malloc(zd -> n * sizeof(ZOLO_PRECISION));
410 for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (ZOLO_PRECISION) d -> gamma[m];
411 free(d -> gamma);
412
413 free(d);
414 return zd;
415}
416
417
418void zolotarev_free(zolotarev_data *zdata)
419{
420 free(zdata -> a);
421 free(zdata -> ap);
422 free(zdata -> alpha);
423 free(zdata -> beta);
424 free(zdata -> gamma);
425 free(zdata);
426}
427
428
429zolotarev_data* higham(ZOLO_PRECISION epsilon, int n) {
430 INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
431 int m, czero;
432 zolotarev_data *zd;
433 izd *d = (izd*) malloc(sizeof(izd));
434
435 d -> type = 0;
436 d -> epsilon = (INTERNAL_PRECISION) epsilon;
437 d -> n = n;
438 d -> dd = n / 2;
439 d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
440 d -> deg_denom = 2 * d -> dd;
441 d -> deg_num = 2 * d -> dn + 1;
442
443 d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
444 sizeof(INTERNAL_PRECISION));
445 d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
446 sizeof(INTERNAL_PRECISION));
447 A = (INTERNAL_PRECISION) n;
448 z0 = M_PI / A;
449 A = n % 2 == 0 ? A : ONE / A;
450 M = d -> epsilon * A;
451 epssq = d -> epsilon * d -> epsilon;
452
453 for (m = 0; m < d -> dd; m++) {
454 czero = 2 * (m + 1) == n; /* n even and m = dd - 1*/
455
456 if (!czero) {
457 z = z0 * ((INTERNAL_PRECISION) m + ONE);
458 t = tan(z);
459 c = - t*t;
460 (d -> a)[d -> dn - 1 - m] = c;
461 M *= epssq - c;
462 }
463
464 z = z0 * ((INTERNAL_PRECISION) m + HALF);
465 t = tan(z);
466 cp = - t*t;
467 (d -> ap)[d -> dd - 1 - m] = cp;
468 M /= epssq - cp;
469 }
470
471 d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
472 sizeof(INTERNAL_PRECISION));
473 for (m = 0; m < d -> n; m++) d -> gamma[m] = ONE;
474
475 d -> A = A;
476 d -> Delta = ONE - M;
477
480
481 /* Converting everything to PRECISION for external use only */
482
483 zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
484 zd -> A = (ZOLO_PRECISION) d -> A;
485 zd -> Delta = (ZOLO_PRECISION) d -> Delta;
486 zd -> epsilon = (ZOLO_PRECISION) d -> epsilon;
487 zd -> n = d -> n;
488 zd -> type = d -> type;
489 zd -> dn = d -> dn;
490 zd -> dd = d -> dd;
491 zd -> da = d -> da;
492 zd -> db = d -> db;
493 zd -> deg_num = d -> deg_num;
494 zd -> deg_denom = d -> deg_denom;
495
496 zd -> a = (ZOLO_PRECISION*) malloc(zd -> dn * sizeof(ZOLO_PRECISION));
497 for (m = 0; m < zd -> dn; m++) zd -> a[m] = (ZOLO_PRECISION) d -> a[m];
498 free(d -> a);
499
500 zd -> ap = (ZOLO_PRECISION*) malloc(zd -> dd * sizeof(ZOLO_PRECISION));
501 for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (ZOLO_PRECISION) d -> ap[m];
502 free(d -> ap);
503
504 zd -> alpha = (ZOLO_PRECISION*) malloc(zd -> da * sizeof(ZOLO_PRECISION));
505 for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (ZOLO_PRECISION) d -> alpha[m];
506 free(d -> alpha);
507
508 zd -> beta = (ZOLO_PRECISION*) malloc(zd -> db * sizeof(ZOLO_PRECISION));
509 for (m = 0; m < zd -> db; m++) zd -> beta[m] = (ZOLO_PRECISION) d -> beta[m];
510 free(d -> beta);
511
512 zd -> gamma = (ZOLO_PRECISION*) malloc(zd -> n * sizeof(ZOLO_PRECISION));
513 for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (ZOLO_PRECISION) d -> gamma[m];
514 free(d -> gamma);
515
516 free(d);
517 return zd;
518}
519
522
523#ifdef TEST
524
525#undef ZERO
526#define ZERO ((ZOLO_PRECISION) 0)
527#undef ONE
528#define ONE ((ZOLO_PRECISION) 1)
529#undef TWO
530#define TWO ((ZOLO_PRECISION) 2)
531
532/* Evaluate the rational approximation R(x) using the factored form */
533
534static ZOLO_PRECISION zolotarev_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
535 int m;
537
538 if (rdata -> type == 0) {
539 R = rdata -> A * x;
540 for (m = 0; m < rdata -> deg_denom/2; m++)
541 R *= (2*(m+1) > rdata -> deg_num ? ONE : x*x - rdata -> a[m]) /
542 (x*x - rdata -> ap[m]);
543 } else {
544 R = rdata -> A / x;
545 for (m = 0; m < rdata -> deg_num/2; m++)
546 R *= (x*x - rdata -> a[m]) /
547 (2*(m+1) > rdata -> deg_denom ? ONE : x*x - rdata -> ap[m]);
548 }
549 return R;
550}
551
552/* Evaluate the rational approximation R(x) using the partial fraction form */
553
554static ZOLO_PRECISION zolotarev_partfrac_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
555 int m;
556 ZOLO_PRECISION R = rdata -> alpha[rdata -> da - 1];
557 for (m = 0; m < rdata -> dd; m++)
558 R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
559 if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
560 return R * x;
561}
562
563/* Evaluate the rational approximation R(x) using continued fraction form.
564 *
565 * If x = 0 and type = 1 then the result should be INF, whereas if x = 0 and
566 * type = 0 then the result should be 0, but division by zero will occur at
567 * intermediate stages of the evaluation. For IEEE implementations with
568 * non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
569 * but with signalling overflow you will get an error message. */
570
571static ZOLO_PRECISION zolotarev_contfrac_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
572 int m;
573 ZOLO_PRECISION R = rdata -> beta[0] * x;
574 for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
575 return R;
576}
577
578/* Evaluate the rational approximation R(x) using Cayley form */
579
580static ZOLO_PRECISION zolotarev_cayley_eval(ZOLO_PRECISION x, zolotarev_data* rdata) {
581 int m;
583
584 T = rdata -> type == 0 ? ONE : -ONE;
585 for (m = 0; m < rdata -> n; m++)
586 T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
587 return (ONE - T) / (ONE + T);
588}
589
590
591/* Test program. Apart from printing out the parameters for R(x) it produces
592 * the following data files for plotting (unless NPLOT is defined):
593 *
594 * zolotarev-fn is a plot of R(x) for |x| < 1.2. This should look like sgn(x).
595 *
596 * zolotarev-err is a plot of the error |R(x) - sgn(x)| scaled by 1/Delta. This
597 * should oscillate deg_num + deg_denom + 2 times between +1 and -1 over the
598 * domain epsilon <= |x| <= 1.
599 *
600 * If ALLPLOTS is defined then zolotarev-partfrac (zolotarev-contfrac) is a
601 * plot of the difference between the values of R(x) computed using the
602 * factored form and the partial fraction (continued fraction) form, scaled by
603 * 1/Delta. It should be zero everywhere. */
604
605int main(int argc, char** argv) {
606
607 int m, n, plotpts = 5000, type = 0;
608 float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
609 zolotarev_data *rdata;
611 FILE *plot_function, *plot_error,
612 *plot_partfrac, *plot_contfrac, *plot_cayley;
613
614 if (argc < 3 || argc > 4) {
615 fprintf(stderr, "Usage: %s epsilon n [type]\n", *argv);
616 exit(EXIT_FAILURE);
617 }
618 sscanf(argv[1], "%g", &eps); /* First argument is epsilon */
619 sscanf(argv[2], "%d", &n); /* Second argument is n */
620 if (argc == 4) sscanf(argv[3], "%d", &type); /* Third argument is type */
621
622 if (type < 0 || type > 2) {
623 fprintf(stderr, "%s: type must be 0 (Zolotarev R(0) = 0),\n"
624 "\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
625 exit(EXIT_FAILURE);
626 }
627
628 rdata = type == 2
629 ? higham((ZOLO_PRECISION) eps, n)
630 : zolotarev((ZOLO_PRECISION) eps, n, type);
631
632 printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t"
634 "\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
635 "\tZOLO_PRECISION = " STRINGIFY(ZOLO_PRECISION)
636 "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
637 "\tDelta = %g (maximum error)\n\n"
638 "\tA = %g (overall factor)\n",
639 (float) rdata -> epsilon, rdata -> n, type,
640 rdata -> deg_num, rdata -> deg_denom,
641 rdata -> type == 1 ? "infinite" : "zero",
642 (float) rdata -> Delta, (float) rdata -> A);
643 for (m = 0; m < MIN(rdata -> dd, rdata -> dn); m++)
644 printf("\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
645 m + 1, (float) rdata -> a[m], m + 1, (float) rdata -> ap[m]);
646 if (rdata -> dd > rdata -> dn)
647 printf("\t\t\t\t\ta'[%2d] = %14.8g\n",
648 rdata -> dn + 1, (float) rdata -> ap[rdata -> dn]);
649 if (rdata -> dd < rdata -> dn)
650 printf("\ta[%2d] = %14.8g\n",
651 rdata -> dd + 1, (float) rdata -> a[rdata -> dd]);
652
653 printf("\n\tPartial fraction coefficients\n");
654 printf("\talpha[ 0] = %14.8g\n",
655 (float) rdata -> alpha[rdata -> da - 1]);
656 for (m = 0; m < rdata -> dd; m++)
657 printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
658 m + 1, (float) rdata -> alpha[m], m + 1, (float) rdata -> ap[m]);
659 if (rdata -> type == 1)
660 printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
661 rdata -> dd + 1, (float) rdata -> alpha[rdata -> dd],
662 rdata -> dd + 1, (float) ZERO);
663
664 printf("\n\tContinued fraction coefficients\n");
665 for (m = 0; m < rdata -> db; m++)
666 printf("\tbeta[%2d] = %14.8g\n", m, (float) rdata -> beta[m]);
667
668 printf("\n\tCayley transform coefficients\n");
669 for (m = 0; m < rdata -> n; m++)
670 printf("\tgamma[%2d] = %14.8g\n", m, (float) rdata -> gamma[m]);
671
672#ifndef NPLOT
673 plot_function = fopen("zolotarev-fn.dat", "w");
674 plot_error = fopen("zolotarev-err.dat", "w");
675#ifdef ALLPLOTS
676 plot_partfrac = fopen("zolotarev-partfrac.dat", "w");
677 plot_contfrac = fopen("zolotarev-contfrac.dat", "w");
678 plot_cayley = fopen("zolotarev-cayley.dat", "w");
679#endif /* ALLPLOTS */
680 for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
681 x = 2.4 * (float) m / plotpts - 1.2;
682 if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
683 /* skip x = 0 for type 1, as R(0) is singular */
684 y = zolotarev_eval((ZOLO_PRECISION) x, rdata);
685 fprintf(plot_function, "%g %g\n", x, (float) y);
686 fprintf(plot_error, "%g %g\n",
687 x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
688 ypferr = (float)((zolotarev_partfrac_eval((ZOLO_PRECISION) x, rdata) - y)
689 / rdata -> Delta);
690 ycferr = (float)((zolotarev_contfrac_eval((ZOLO_PRECISION) x, rdata) - y)
691 / rdata -> Delta);
692 ycaylerr = (float)((zolotarev_cayley_eval((ZOLO_PRECISION) x, rdata) - y)
693 / rdata -> Delta);
694 if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
695 maxypferr = MAX(maxypferr, fabs(ypferr));
696 maxycferr = MAX(maxycferr, fabs(ycferr));
697 maxycaylerr = MAX(maxycaylerr, fabs(ycaylerr));
698 }
699#ifdef ALLPLOTS
700 fprintf(plot_partfrac, "%g %g\n", x, ypferr);
701 fprintf(plot_contfrac, "%g %g\n", x, ycferr);
702 fprintf(plot_cayley, "%g %g\n", x, ycaylerr);
703#endif /* ALLPLOTS */
704 }
705 }
706#ifdef ALLPLOTS
707 fclose(plot_cayley);
708 fclose(plot_contfrac);
709 fclose(plot_partfrac);
710#endif /* ALLPLOTS */
711 fclose(plot_error);
712 fclose(plot_function);
713
714 printf("\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
715 printf("\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
716 printf("\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
717#endif /* NPLOT */
718
719 free(rdata -> a);
720 free(rdata -> ap);
721 free(rdata -> alpha);
722 free(rdata -> beta);
723 free(rdata -> gamma);
724 free(rdata);
725
726 return EXIT_SUCCESS;
727}
728
729#endif /* TEST */
730
accelerator_inline Grid_simd< S, V > asin(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > sin(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > sqrt(const Grid_simd< S, V > &r)
#define NAMESPACE_BEGIN(A)
Definition Namespace.h:35
#define NAMESPACE_END(A)
Definition Namespace.h:36
#define THREE
Definition Zolotarev.cc:48
#define VERSION
Definition Zolotarev.cc:2
#define ONE
Definition Zolotarev.cc:46
#define INTERNAL_PRECISION
Definition Zolotarev.cc:26
static INTERNAL_PRECISION K
Definition Zolotarev.cc:230
#define HALF
Definition Zolotarev.cc:50
#define MIN(a, b)
Definition Zolotarev.cc:23
#define FOUR
Definition Zolotarev.cc:49
static INTERNAL_PRECISION U
Definition Zolotarev.cc:230
static INTERNAL_PRECISION * contfrac_B(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
Definition Zolotarev.cc:200
static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k, INTERNAL_PRECISION *sn, INTERNAL_PRECISION *cn, INTERNAL_PRECISION *dn, INTERNAL_PRECISION *elF, INTERNAL_PRECISION *elK)
Definition Zolotarev.cc:273
#define TWO
Definition Zolotarev.cc:47
static INTERNAL_PRECISION * contfrac_A(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
Definition Zolotarev.cc:175
zolotarev_data * higham(ZOLO_PRECISION epsilon, int n)
Definition Zolotarev.cc:429
zolotarev_data * zolotarev(ZOLO_PRECISION epsilon, int n, int type)
Definition Zolotarev.cc:296
static INTERNAL_PRECISION F
Definition Zolotarev.cc:230
#define ZERO
Definition Zolotarev.cc:45
static void construct_contfrac(izd *z)
Definition Zolotarev.cc:158
static INTERNAL_PRECISION * poly_factored_to_dense(INTERNAL_PRECISION A, INTERNAL_PRECISION *a, int d)
Definition Zolotarev.cc:87
static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a, INTERNAL_PRECISION b, INTERNAL_PRECISION s)
Definition Zolotarev.cc:248
void zolotarev_free(zolotarev_data *zdata)
Definition Zolotarev.cc:418
#define M_PI
Definition Zolotarev.cc:41
#define STRINGIFY(name)
Definition Zolotarev.cc:57
#define MAX(a, b)
Definition Zolotarev.cc:22
static void construct_partfrac(izd *z)
Definition Zolotarev.cc:64
#define ZOLO_PRECISION
Definition Zolotarev.h:13
#define HVERSION
Definition Zolotarev.h:9