2#define VERSION Source Time-stamp: <2015-05-18 16:32:08 neo>
22#define MAX(a,b) ((a) > (b) ? (a) : (b))
23#define MIN(a,b) ((a) < (b) ? (a) : (b))
25#ifndef INTERNAL_PRECISION
26#define INTERNAL_PRECISION double
30#define ZOLOTAREV_INTERNAL
32#define ZOLOTAREV_DATA izd
34#define ZPRECISION INTERNAL_PRECISION
36#undef ZOLOTAREV_INTERNAL
41#define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
42169399375105820974944592307816406286208998628034825342117068)
45#define ZERO ((INTERNAL_PRECISION) 0)
46#define ONE ((INTERNAL_PRECISION) 1)
47#define TWO ((INTERNAL_PRECISION) 2)
48#define THREE ((INTERNAL_PRECISION) 3)
49#define FOUR ((INTERNAL_PRECISION) 4)
56#define PP1(a,b,c) a ## b(c)
57#define STRINGIFY(name) PP1(PP,2,name)
65 int dn = z -> dn, dd = z -> dd,
type = z ->
type;
66 int j, k, da = dd + 1 +
type;
69 for (j = 0; j < dd; j++)
70 for (k = 0, alpha[j] = A; k < dd; k++)
72 (k < dn ? ap[j] - a[k] :
ONE) / (k == j ?
ONE : ap[j] - ap[k]);
74 for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] :
ONE); k < dd; k++) {
75 alpha[dd] *= a[k] / ap[k];
76 alpha[k] *= (dn > dd ? ap[k] - a[dd] :
ONE) / ap[k];
78 alpha[da-1] = dn == da - 1 ? A :
ZERO;
94 for (i = 0; i < d; i++) {
96 for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
160 int dp = z -> dn, dq = z -> dd,
type = z ->
type;
162 z -> db = 2 * dq + 1 +
type;
170 else (
void)
contfrac_A(z -> beta, p, q, r, dp, dq);
171 free(p); free(q); free(r);
184 quot = dp == dq ?
ZERO : p[dp] / q[dq];
186 for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
188 printf(
"%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
189 __FUNCTION__, dp, dq, (
float) quot);
190 for (j = 0; j <= dq + 1; j++)
191 printf(
"\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
192 j, (
float) (j > dp ?
ZERO : p[j]),
193 j, (
float) (j == 0 ?
ZERO : q[j-1]),
194 j, (
float) (j == dp ?
ZERO : r[j]));
196 *(rb =
contfrac_B(beta, q, r, p, dq, dq)) = quot;
209 quot = dp == dq ? p[dp] / q[dq] :
ZERO;
210 for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
212 printf(
"%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
213 __FUNCTION__, dp, dq, (
float) quot);
214 for (j = 0; j <= dq; j++)
215 printf(
"\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
216 j, (
float) (j > dp ?
ZERO : p[j]),
218 j, (
float) (j == dq ?
ZERO : r[j]));
220 *(rb = dq > 0 ?
contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
264 HALF*s*d/a : (a -
sqrt(a*a - s*s*c*d))/(c*s));
265 return 2*a*xi / (d + c*xi*xi);
280 sgn = ((int) (fabs(u) /
K)) % 4;
282 sgn = 1 - ((sgn & 1) << 1);
284 *dn =
sqrt(
ONE - k*k* *sn * *sn);
297 INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M,
F,
298 l, invlambda, xi, xisq, *tv, s, opl;
301 izd *d = (izd*) malloc(
sizeof(izd));
307 d -> dn = d -> dd - 1 + n % 2;
308 d -> deg_denom = 2 * d -> dd;
309 d -> deg_num = 2 * d -> dn + 1;
315 ksq = d -> epsilon * d -> epsilon;
320 A =
ONE / d -> epsilon;
327 for (m = 0; m < d -> dd; m++) {
328 czero = 2 * (m + 1) == n;
334 if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
340 (d -> ap)[d -> dd - 1 - m] = ksq / cp;
342 M *= (
ONE - c) / (
ONE - cp);
343 A *= (czero ? -ksq : c) * (
ONE - cp) / (cp * (
ONE - c));
344 invlambda *= (
ONE - c*xisq) / (
ONE - cp*xisq);
347 d -> A =
TWO / (
ONE + invlambda) * A;
348 d -> Delta = (invlambda -
ONE) / (invlambda +
ONE);
357 ),
sqrt(
ONE - l*l), &sn, &cn, &dn, &
F, &Kj);
359 for (m = 0; m < d -> n; m++) {
361 d -> gamma[m] = d -> epsilon / dn;
368 if (d ->
type == 1) {
369 d -> A = (
ONE - d -> Delta * d -> Delta) / d -> A;
370 tv = d -> a; d -> a = d -> ap; d -> ap = tv;
371 ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
372 ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
380 zd = (zolotarev_data*) malloc(
sizeof(zolotarev_data));
390 zd -> deg_num = d -> deg_num;
391 zd -> deg_denom = d -> deg_denom;
394 for (m = 0; m < zd -> dn; m++) zd -> a[m] = (
ZOLO_PRECISION) d -> a[m];
398 for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (
ZOLO_PRECISION) d -> ap[m];
402 for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (
ZOLO_PRECISION) d -> alpha[m];
406 for (m = 0; m < zd -> db; m++) zd -> beta[m] = (
ZOLO_PRECISION) d -> beta[m];
410 for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (
ZOLO_PRECISION) d -> gamma[m];
422 free(zdata -> alpha);
424 free(zdata -> gamma);
433 izd *d = (izd*) malloc(
sizeof(izd));
439 d -> dn = d -> dd - 1 + n % 2;
440 d -> deg_denom = 2 * d -> dd;
441 d -> deg_num = 2 * d -> dn + 1;
449 A = n % 2 == 0 ? A :
ONE / A;
450 M = d -> epsilon * A;
451 epssq = d -> epsilon * d -> epsilon;
453 for (m = 0; m < d -> dd; m++) {
454 czero = 2 * (m + 1) == n;
460 (d -> a)[d -> dn - 1 - m] = c;
467 (d -> ap)[d -> dd - 1 - m] = cp;
473 for (m = 0; m < d -> n; m++) d -> gamma[m] =
ONE;
476 d -> Delta =
ONE - M;
483 zd = (zolotarev_data*) malloc(
sizeof(zolotarev_data));
493 zd -> deg_num = d -> deg_num;
494 zd -> deg_denom = d -> deg_denom;
497 for (m = 0; m < zd -> dn; m++) zd -> a[m] = (
ZOLO_PRECISION) d -> a[m];
501 for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (
ZOLO_PRECISION) d -> ap[m];
505 for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (
ZOLO_PRECISION) d -> alpha[m];
509 for (m = 0; m < zd -> db; m++) zd -> beta[m] = (
ZOLO_PRECISION) d -> beta[m];
513 for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (
ZOLO_PRECISION) d -> gamma[m];
526#define ZERO ((ZOLO_PRECISION) 0)
528#define ONE ((ZOLO_PRECISION) 1)
530#define TWO ((ZOLO_PRECISION) 2)
538 if (rdata ->
type == 0) {
540 for (m = 0; m < rdata -> deg_denom/2; m++)
541 R *= (2*(m+1) > rdata -> deg_num ?
ONE : x*x - rdata -> a[m]) /
542 (x*x - rdata -> ap[m]);
545 for (m = 0; m < rdata -> deg_num/2; m++)
546 R *= (x*x - rdata -> a[m]) /
547 (2*(m+1) > rdata -> deg_denom ?
ONE : x*x - rdata -> ap[m]);
557 for (m = 0; m < rdata -> dd; m++)
558 R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
559 if (rdata ->
type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
574 for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x +
ONE / R;
585 for (m = 0; m < rdata -> n; m++)
586 T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
587 return (
ONE - T) / (
ONE + T);
605int main(
int argc,
char** argv) {
607 int m, n, plotpts = 5000,
type = 0;
608 float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
609 zolotarev_data *rdata;
611 FILE *plot_function, *plot_error,
612 *plot_partfrac, *plot_contfrac, *plot_cayley;
614 if (argc < 3 || argc > 4) {
615 fprintf(stderr,
"Usage: %s epsilon n [type]\n", *argv);
618 sscanf(argv[1],
"%g", &eps);
619 sscanf(argv[2],
"%d", &n);
620 if (argc == 4) sscanf(argv[3],
"%d", &
type);
622 if (type < 0 || type > 2) {
623 fprintf(stderr,
"%s: type must be 0 (Zolotarev R(0) = 0),\n"
624 "\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
632 printf(
"Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t"
636 "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
637 "\tDelta = %g (maximum error)\n\n"
638 "\tA = %g (overall factor)\n",
639 (
float) rdata -> epsilon, rdata -> n,
type,
640 rdata -> deg_num, rdata -> deg_denom,
641 rdata ->
type == 1 ?
"infinite" :
"zero",
642 (
float) rdata -> Delta, (
float) rdata -> A);
643 for (m = 0; m <
MIN(rdata -> dd, rdata -> dn); m++)
644 printf(
"\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
645 m + 1, (
float) rdata -> a[m], m + 1, (
float) rdata -> ap[m]);
646 if (rdata -> dd > rdata -> dn)
647 printf(
"\t\t\t\t\ta'[%2d] = %14.8g\n",
648 rdata -> dn + 1, (
float) rdata -> ap[rdata -> dn]);
649 if (rdata -> dd < rdata -> dn)
650 printf(
"\ta[%2d] = %14.8g\n",
651 rdata -> dd + 1, (
float) rdata -> a[rdata -> dd]);
653 printf(
"\n\tPartial fraction coefficients\n");
654 printf(
"\talpha[ 0] = %14.8g\n",
655 (
float) rdata -> alpha[rdata -> da - 1]);
656 for (m = 0; m < rdata -> dd; m++)
657 printf(
"\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
658 m + 1, (
float) rdata -> alpha[m], m + 1, (
float) rdata -> ap[m]);
659 if (rdata ->
type == 1)
660 printf(
"\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
661 rdata -> dd + 1, (
float) rdata -> alpha[rdata -> dd],
662 rdata -> dd + 1, (
float)
ZERO);
664 printf(
"\n\tContinued fraction coefficients\n");
665 for (m = 0; m < rdata -> db; m++)
666 printf(
"\tbeta[%2d] = %14.8g\n", m, (
float) rdata -> beta[m]);
668 printf(
"\n\tCayley transform coefficients\n");
669 for (m = 0; m < rdata -> n; m++)
670 printf(
"\tgamma[%2d] = %14.8g\n", m, (
float) rdata -> gamma[m]);
673 plot_function = fopen(
"zolotarev-fn.dat",
"w");
674 plot_error = fopen(
"zolotarev-err.dat",
"w");
676 plot_partfrac = fopen(
"zolotarev-partfrac.dat",
"w");
677 plot_contfrac = fopen(
"zolotarev-contfrac.dat",
"w");
678 plot_cayley = fopen(
"zolotarev-cayley.dat",
"w");
680 for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
681 x = 2.4 * (float) m / plotpts - 1.2;
682 if (rdata ->
type == 0 || fabs(x) * (
float) plotpts > 1.0) {
685 fprintf(plot_function,
"%g %g\n", x, (
float) y);
686 fprintf(plot_error,
"%g %g\n",
687 x, (
float)((y - ((x > 0.0 ?
ONE : -
ONE))) / rdata -> Delta));
688 ypferr = (float)((zolotarev_partfrac_eval((
ZOLO_PRECISION) x, rdata) - y)
690 ycferr = (float)((zolotarev_contfrac_eval((
ZOLO_PRECISION) x, rdata) - y)
692 ycaylerr = (float)((zolotarev_cayley_eval((
ZOLO_PRECISION) x, rdata) - y)
694 if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
695 maxypferr =
MAX(maxypferr, fabs(ypferr));
696 maxycferr =
MAX(maxycferr, fabs(ycferr));
697 maxycaylerr =
MAX(maxycaylerr, fabs(ycaylerr));
700 fprintf(plot_partfrac,
"%g %g\n", x, ypferr);
701 fprintf(plot_contfrac,
"%g %g\n", x, ycferr);
702 fprintf(plot_cayley,
"%g %g\n", x, ycaylerr);
708 fclose(plot_contfrac);
709 fclose(plot_partfrac);
712 fclose(plot_function);
714 printf(
"\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
715 printf(
"\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
716 printf(
"\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
721 free(rdata -> alpha);
723 free(rdata -> gamma);
accelerator_inline Grid_simd< S, V > asin(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > sin(const Grid_simd< S, V > &r)
accelerator_inline Grid_simd< S, V > sqrt(const Grid_simd< S, V > &r)
#define NAMESPACE_BEGIN(A)
#define INTERNAL_PRECISION
static INTERNAL_PRECISION K
static INTERNAL_PRECISION U
static INTERNAL_PRECISION * contfrac_B(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k, INTERNAL_PRECISION *sn, INTERNAL_PRECISION *cn, INTERNAL_PRECISION *dn, INTERNAL_PRECISION *elF, INTERNAL_PRECISION *elK)
static INTERNAL_PRECISION * contfrac_A(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
zolotarev_data * higham(ZOLO_PRECISION epsilon, int n)
zolotarev_data * zolotarev(ZOLO_PRECISION epsilon, int n, int type)
static INTERNAL_PRECISION F
static void construct_contfrac(izd *z)
static INTERNAL_PRECISION * poly_factored_to_dense(INTERNAL_PRECISION A, INTERNAL_PRECISION *a, int d)
static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a, INTERNAL_PRECISION b, INTERNAL_PRECISION s)
void zolotarev_free(zolotarev_data *zdata)
static void construct_partfrac(izd *z)