@@ -24,75 +24,60 @@ tiny = 1e-30,
2424half = 5.0000000000e-01 , /* 0x3F000000 */
2525one = 1.0000000000e+00 , /* 0x3F800000 */
2626two = 2.0000000000e+00 , /* 0x40000000 */
27- /* c = (subfloat)0.84506291151 */
28- erx = 8.4506291151e-01 , /* 0x3f58560b */
2927/*
30- * Coefficients for approximation to erf on [0,0.84375]
28+ * Coefficients for approximation to erf on [0,0.84375]
3129 */
3230efx = 1.2837916613e-01 , /* 0x3e0375d4 */
3331efx8 = 1.0270333290e+00 , /* 0x3f8375d4 */
34- pp0 = 1.2837916613e-01 , /* 0x3e0375d4 */
35- pp1 = -3.2504209876e-01 , /* 0xbea66beb */
36- pp2 = -2.8481749818e-02 , /* 0xbce9528f */
37- pp3 = -5.7702702470e-03 , /* 0xbbbd1489 */
38- pp4 = -2.3763017452e-05 , /* 0xb7c756b1 */
39- qq1 = 3.9791721106e-01 , /* 0x3ecbbbce */
40- qq2 = 6.5022252500e-02 , /* 0x3d852a63 */
41- qq3 = 5.0813062117e-03 , /* 0x3ba68116 */
42- qq4 = 1.3249473704e-04 , /* 0x390aee49 */
43- qq5 = -3.9602282413e-06 , /* 0xb684e21a */
4432/*
45- * Coefficients for approximation to erf in [0.84375,1.25]
33+ * Domain [0, 0.84375], range ~[-5.4446e-10,5.5197e-10]:
34+ * |(erf(x) - x)/x - p(x)/q(x)| < 2**-31.
4635 */
47- pa0 = -2.3621185683e-03 , /* 0xbb1acdc6 */
48- pa1 = 4.1485610604e-01 , /* 0x3ed46805 */
49- pa2 = -3.7220788002e-01 , /* 0xbebe9208 */
50- pa3 = 3.1834661961e-01 , /* 0x3ea2fe54 */
51- pa4 = -1.1089469492e-01 , /* 0xbde31cc2 */
52- pa5 = 3.5478305072e-02 , /* 0x3d1151b3 */
53- pa6 = -2.1663755178e-03 , /* 0xbb0df9c0 */
54- qa1 = 1.0642088205e-01 , /* 0x3dd9f331 */
55- qa2 = 5.4039794207e-01 , /* 0x3f0a5785 */
56- qa3 = 7.1828655899e-02 , /* 0x3d931ae7 */
57- qa4 = 1.2617121637e-01 , /* 0x3e013307 */
58- qa5 = 1.3637083583e-02 , /* 0x3c5f6e13 */
59- qa6 = 1.1984500103e-02 , /* 0x3c445aa3 */
36+ pp0 = 1.28379166e-01F , /* 0x1.06eba8p-3 */
37+ pp1 = -3.36030394e-01F , /* -0x1.58185ap-2 */
38+ pp2 = -1.86260219e-03F , /* -0x1.e8451ep-10 */
39+ qq1 = 3.12324286e-01F , /* 0x1.3fd1f0p-2 */
40+ qq2 = 2.16070302e-02F , /* 0x1.620274p-6 */
41+ qq3 = -1.98859419e-03F , /* -0x1.04a626p-9 */
6042/*
61- * Coefficients for approximation to erfc in [1.25,1/0.35]
43+ * Domain [0.84375, 1.25], range ~[-1.953e-11,1.940e-11]:
44+ * |(erf(x) - erx) - p(x)/q(x)| < 2**-36.
6245 */
63- ra0 = -9.8649440333e-03 , /* 0xbc21a093 */
64- ra1 = -6.9385856390e-01 , /* 0xbf31a0b7 */
65- ra2 = -1.0558626175e+01 , /* 0xc128f022 */
66- ra3 = -6.2375331879e+01 , /* 0xc2798057 */
67- ra4 = -1.6239666748e+02 , /* 0xc322658c */
68- ra5 = -1.8460508728e+02 , /* 0xc3389ae7 */
69- ra6 = -8.1287437439e+01 , /* 0xc2a2932b */
70- ra7 = -9.8143291473e+00 , /* 0xc11d077e */
71- sa1 = 1.9651271820e+01 , /* 0x419d35ce */
72- sa2 = 1.3765776062e+02 , /* 0x4309a863 */
73- sa3 = 4.3456588745e+02 , /* 0x43d9486f */
74- sa4 = 6.4538726807e+02 , /* 0x442158c9 */
75- sa5 = 4.2900814819e+02 , /* 0x43d6810b */
76- sa6 = 1.0863500214e+02 , /* 0x42d9451f */
77- sa7 = 6.5702495575e+00 , /* 0x40d23f7c */
78- sa8 = -6.0424413532e-02 , /* 0xbd777f97 */
46+ erx = 8.42697144e-01F , /* 0x1.af7600p-1. erf(1) rounded to 16 bits. */
47+ pa0 = 3.64939137e-06F , /* 0x1.e9d022p-19 */
48+ pa1 = 4.15109694e-01F , /* 0x1.a91284p-2 */
49+ pa2 = -1.65179938e-01F , /* -0x1.5249dcp-3 */
50+ pa3 = 1.10914491e-01F , /* 0x1.c64e46p-4 */
51+ qa1 = 6.02074385e-01F , /* 0x1.344318p-1 */
52+ qa2 = 5.35934687e-01F , /* 0x1.126608p-1 */
53+ qa3 = 1.68576106e-01F , /* 0x1.593e6ep-3 */
54+ qa4 = 5.62181212e-02F , /* 0x1.cc89f2p-5 */
7955/*
80- * Coefficients for approximation to erfc in [1/.35,28]
56+ * Domain [1.25,1/0.35], range ~[-7.043e-10,7.457e-10]:
57+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-30
8158 */
82- rb0 = -9.8649431020e-03 , /* 0xbc21a092 */
83- rb1 = -7.9928326607e-01 , /* 0xbf4c9dd4 */
84- rb2 = -1.7757955551e+01 , /* 0xc18e104b */
85- rb3 = -1.6063638306e+02 , /* 0xc320a2ea */
86- rb4 = -6.3756646729e+02 , /* 0xc41f6441 */
87- rb5 = -1.0250950928e+03 , /* 0xc480230b */
88- rb6 = -4.8351919556e+02 , /* 0xc3f1c275 */
89- sb1 = 3.0338060379e+01 , /* 0x41f2b459 */
90- sb2 = 3.2579251099e+02 , /* 0x43a2e571 */
91- sb3 = 1.5367296143e+03 , /* 0x44c01759 */
92- sb4 = 3.1998581543e+03 , /* 0x4547fdbb */
93- sb5 = 2.5530502930e+03 , /* 0x451f90ce */
94- sb6 = 4.7452853394e+02 , /* 0x43ed43a7 */
95- sb7 = -2.2440952301e+01 ; /* 0xc1b38712 */
59+ ra0 = -9.87132732e-03F , /* -0x1.4376b2p-7 */
60+ ra1 = -5.53605914e-01F , /* -0x1.1b723cp-1 */
61+ ra2 = -2.17589188e+00F , /* -0x1.1683a0p+1 */
62+ ra3 = -1.43268085e+00F , /* -0x1.6ec42cp+0 */
63+ sa1 = 5.45995426e+00F , /* 0x1.5d6fe4p+2 */
64+ sa2 = 6.69798088e+00F , /* 0x1.acabb8p+2 */
65+ sa3 = 1.43113089e+00F , /* 0x1.6e5e98p+0 */
66+ sa4 = -5.77397496e-02F , /* -0x1.d90108p-5 */
67+ /*
68+ * Domain [1/0.35, 11], range ~[-2.264e-13,2.336e-13]:
69+ * |log(x*erfc(x)) + x**2 + 0.5625 - r(x)/s(x)| < 2**-42
70+ */
71+ rb0 = -9.86494310e-03F , /* -0x1.434124p-7 */
72+ rb1 = -6.25171244e-01F , /* -0x1.401672p-1 */
73+ rb2 = -6.16498327e+00F , /* -0x1.8a8f16p+2 */
74+ rb3 = -1.66696873e+01F , /* -0x1.0ab70ap+4 */
75+ rb4 = -9.53764343e+00F , /* -0x1.313460p+3 */
76+ sb1 = 1.26884899e+01F , /* 0x1.96081cp+3 */
77+ sb2 = 4.51839523e+01F , /* 0x1.6978bcp+5 */
78+ sb3 = 4.72810211e+01F , /* 0x1.7a3f88p+5 */
79+ sb4 = 8.93033314e+00F ; /* 0x1.1dc54ap+3 */
80+
9681
9782DLLEXPORT float
9883erff (float x )
@@ -107,43 +92,37 @@ erff(float x)
10792 }
10893
10994 if (ix < 0x3f580000 ) { /* |x|<0.84375 */
110- if (ix < 0x31800000 ) { /* |x|<2**-28 */
111- if (ix < 0x04000000 )
112- /*avoid underflow */
113- return (float )0.125 * ((float )8.0 * x + efx8 * x );
95+ if (ix < 0x38800000 ) { /* |x|<2**-14 */
96+ if (ix < 0x04000000 ) /* |x|<0x1p-119 */
97+ return (8 * x + efx8 * x )/8 ; /* avoid spurious underflow */
11498 return x + efx * x ;
11599 }
116100 z = x * x ;
117- r = pp0 + z * (pp1 + z * ( pp2 + z * ( pp3 + z * pp4 )) );
118- s = one + z * (qq1 + z * (qq2 + z * ( qq3 + z * ( qq4 + z * qq5 )) ));
101+ r = pp0 + z * (pp1 + z * pp2 );
102+ s = one + z * (qq1 + z * (qq2 + z * qq3 ));
119103 y = r /s ;
120104 return x + x * y ;
121105 }
122106 if (ix < 0x3fa00000 ) { /* 0.84375 <= |x| < 1.25 */
123107 s = fabsf (x )- one ;
124- P = pa0 + s * (pa1 + s * (pa2 + s * ( pa3 + s * ( pa4 + s * ( pa5 + s * pa6 ))) ));
125- Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * ( qa4 + s * ( qa5 + s * qa6 )) )));
108+ P = pa0 + s * (pa1 + s * (pa2 + s * pa3 ));
109+ Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * qa4 )));
126110 if (hx >=0 ) return erx + P /Q ; else return - erx - P /Q ;
127111 }
128- if (ix >= 0x40c00000 ) { /* inf>|x|>=6 */
112+ if (ix >= 0x40800000 ) { /* inf>|x|>=4 */
129113 if (hx >=0 ) return one - tiny ; else return tiny - one ;
130114 }
131115 x = fabsf (x );
132116 s = one /(x * x );
133117 if (ix < 0x4036DB6E ) { /* |x| < 1/0.35 */
134- R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s * (
135- ra5 + s * (ra6 + s * ra7 ))))));
136- S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s * (
137- sa5 + s * (sa6 + s * (sa7 + s * sa8 )))))));
118+ R = ra0 + s * (ra1 + s * (ra2 + s * ra3 ));
119+ S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * sa4 )));
138120 } else { /* |x| >= 1/0.35 */
139- R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (
140- rb5 + s * rb6 )))));
141- S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s * (
142- sb5 + s * (sb6 + s * sb7 ))))));
143- }
144- GET_FLOAT_WORD (ix ,x );
145- SET_FLOAT_WORD (z ,ix & 0xfffff000 );
146- r = __ieee754_expf (- z * z - (float )0.5625 )* __ieee754_expf ((z - x )* (z + x )+ R /S );
121+ R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * rb4 )));
122+ S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * sb4 )));
123+ }
124+ SET_FLOAT_WORD (z ,hx & 0xffffe000 );
125+ r = expf (- z * z - 0.5625F )* expf ((z - x )* (z + x )+ R /S );
147126 if (hx >=0 ) return one - r /x ; else return r /x - one ;
148127}
149128
@@ -160,11 +139,11 @@ erfcf(float x)
160139 }
161140
162141 if (ix < 0x3f580000 ) { /* |x|<0.84375 */
163- if (ix < 0x23800000 ) /* |x|<2**-56 */
142+ if (ix < 0x33800000 ) /* |x|<2**-56 */
164143 return one - x ;
165144 z = x * x ;
166- r = pp0 + z * (pp1 + z * ( pp2 + z * ( pp3 + z * pp4 )) );
167- s = one + z * (qq1 + z * (qq2 + z * ( qq3 + z * ( qq4 + z * qq5 )) ));
145+ r = pp0 + z * (pp1 + z * pp2 );
146+ s = one + z * (qq1 + z * (qq2 + z * qq3 ));
168147 y = r /s ;
169148 if (hx < 0x3e800000 ) { /* x<1/4 */
170149 return one - (x + x * y );
@@ -176,33 +155,27 @@ erfcf(float x)
176155 }
177156 if (ix < 0x3fa00000 ) { /* 0.84375 <= |x| < 1.25 */
178157 s = fabsf (x )- one ;
179- P = pa0 + s * (pa1 + s * (pa2 + s * ( pa3 + s * ( pa4 + s * ( pa5 + s * pa6 ))) ));
180- Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * ( qa4 + s * ( qa5 + s * qa6 )) )));
158+ P = pa0 + s * (pa1 + s * (pa2 + s * pa3 ));
159+ Q = one + s * (qa1 + s * (qa2 + s * (qa3 + s * qa4 )));
181160 if (hx >=0 ) {
182161 z = one - erx ; return z - P /Q ;
183162 } else {
184163 z = erx + P /Q ; return one + z ;
185164 }
186165 }
187- if (ix < 0x41e00000 ) { /* |x|<28 */
166+ if (ix < 0x41300000 ) { /* |x|<28 */
188167 x = fabsf (x );
189168 s = one /(x * x );
190169 if (ix < 0x4036DB6D ) { /* |x| < 1/.35 ~ 2.857143*/
191- R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s * (
192- ra5 + s * (ra6 + s * ra7 ))))));
193- S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s * (
194- sa5 + s * (sa6 + s * (sa7 + s * sa8 )))))));
170+ R = ra0 + s * (ra1 + s * (ra2 + s * ra3 ));
171+ S = one + s * (sa1 + s * (sa2 + s * (sa3 + s * sa4 )));
195172 } else { /* |x| >= 1/.35 ~ 2.857143 */
196- if (hx < 0 && ix >=0x40c00000 ) return two - tiny ;/* x < -6 */
197- R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (
198- rb5 + s * rb6 )))));
199- S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s * (
200- sb5 + s * (sb6 + s * sb7 ))))));
173+ if (hx < 0 && ix >=0x40a00000 ) return two - tiny ;/* x < -5 */
174+ R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * rb4 )));
175+ S = one + s * (sb1 + s * (sb2 + s * (sb3 + s * sb4 )));
201176 }
202- GET_FLOAT_WORD (ix ,x );
203- SET_FLOAT_WORD (z ,ix & 0xfffff000 );
204- r = __ieee754_expf (- z * z - (float )0.5625 )*
205- __ieee754_expf ((z - x )* (z + x )+ R /S );
177+ SET_FLOAT_WORD (z ,hx & 0xffffe000 );
178+ r = expf (- z * z - 0.5625F )* expf ((z - x )* (z + x )+ R /S );
206179 if (hx > 0 ) return r /x ; else return two - r /x ;
207180 } else {
208181 if (hx > 0 ) return tiny * tiny ; else return two - tiny ;
0 commit comments