@@ -34,37 +34,40 @@ import Base: reinterpret, zero, one, abs, sign, ==, <, <=, +, -, /, *, div, rem,
3434 realmin, realmax, print, show, string, convert, parse, promote_rule, min, max,
3535 trunc, round, floor, ceil, eps, float, widemul
3636
37- const IEEEFloat = Union{Float16, Float32, Float64}
37+ const BitInteger = Union{Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64,
38+ UInt64, Int128, UInt128}
39+
40+ # floats that support fma and are roughly IEEE-like
41+ const FMAFloat = Union{Float16, Float32, Float64, BigFloat}
3842
3943for fn in [:trunc , :floor , :ceil ]
4044 fnname = Symbol (fn, " mul" )
4145
4246 @eval begin
4347 @doc """
44- $($ fnname) (x, y) :: Integer
48+ $($ fnname) (I, x, y) :: I
4549
46- Compute `$($ fn) (x * y)`. For floating point values, this function can
47- be more accurate than `$($ fn) (x * y)`. The boundary behavior of this
48- function (e.g. at large values of `x`, `y`) is untested and possibly
49- incorrect.
50+ Compute `$($ fn) (I, x * y)`, returning the result as type `I`. For
51+ floating point values, this function can be more accurate than
52+ `$($ fn) (x * y)`.
5053 """ function $ fnname end
5154
52- $ fnname {T <: Number} (x:: T , y:: T ) = $ fn (x * y)
55+ $ fnname {I, T <: Number} (:: Type{I} , x:: T , y:: T ) = $ fn (I, x * y)
5356
54- $ fnname ( x:: Number , y:: Number ) = $ fnname (promote (x, y)... )
57+ $ fnname {I} ( :: Type{I} , x:: Number , y:: Number ) = $ fnname (I, promote (x, y)... )
5558 end
5659
5760 if fn === :trunc
5861 # trunc a little different, implement in terms of floor
59- @eval function $fnname {T <: IEEEFloat} ( x:: T , y:: T )
60- copysign (floormul (abs (x), abs (y)), x* y)
62+ @eval function $fnname {I, T <: FMAFloat} ( :: Type{I} , x:: T , y:: T )
63+ copysign (floormul (I, abs (x), abs (y)), x* y)
6164 end
6265 else
6366 # floor and ceil can be implemented the same way
64- @eval function $fnname {T <: IEEEFloat} ( x:: T , y:: T )
67+ @eval function $fnname {I, T <: FMAFloat} ( :: Type{I} , x:: T , y:: T )
6568 a = x * y
6669 b = fma (x, y, - a)
67- ifelse (isinteger (a), a + $ fn (b), $ fn (a ))
70+ $ fn (I, a) + ifelse (isinteger (a), $ fn (I, b), zero (I ))
6871 end
6972 end
7073end
@@ -190,14 +193,46 @@ function ceil{T, f}(x::FD{T, f})
190193 end
191194end
192195
196+ """
197+ required_precision(n::Integer)
198+
199+ Compute the number of bits of precision needed to represent an integer exactly
200+ as a floating point number.
201+
202+ This is equivalent to counting the number of bits needed to represent the
203+ integer, excluding any trailing zeros.
204+ """
205+ required_precision (n:: Integer ) = ndigits (n, 2 ) - trailing_zeros (n)
206+
207+ """
208+ _apply_exact_float(f, T, x::Real, i::Integer)
209+
210+ Compute `f(T, x, i)::T` but avoiding possible loss of precision from an
211+ intermediate conversion of `i` to a floating point type by instead using a
212+ `BigFloat` with sufficient precision if necessary.
213+ """
214+ function _apply_exact_float {T} (f, :: Type{T} , x:: FMAFloat , i:: Integer )
215+ prec = required_precision (i)
216+ if prec > 53
217+ setprecision (BigFloat, prec) do
218+ f (T, x, BigFloat (i))
219+ end
220+ else
221+ f (T, x, Float64 (i))
222+ end
223+ end
224+
225+ _apply_exact_float {T} (f, :: Type{T} , x:: Real , i:: Integer ) = f (T, x, i)
226+
193227for fn in [:trunc , :floor , :ceil ]
194228 @eval $ fn {TI <: Integer} (:: Type{TI} , x:: FD ):: TI = $ fn (x)
195229
196230 # round/trunc/ceil/flooring to FD; generic
197- # TODO . we may need to check overflow and boundary conditions here.
198231 @eval function $fn {T, f} (:: Type{FD{T, f}} , x:: Real )
199232 powt = coefficient (FD{T, f})
200- val = trunc (T, $ (Symbol (fn, " mul" ))(x, powt))
233+ # Use machine Float64 if possible, but fall back to BigFloat if we need
234+ # more precision. 4f bits suffices.
235+ val = _apply_exact_float ($ (Symbol (fn, " mul" )), T, x, powt)
201236 reinterpret (FD{T, f}, val)
202237 end
203238end
0 commit comments