Documentation

LeanPool.Flean.Basic

Floating-Point Numbers #

This module assembles the full Flean.Float type (combining normal and subnormal representations with infinities and NaN), the conversions toFloat and toRat between rationals and floats, and the round-trip and rounding-error correctness results such as to_float_to_rat.

def roundsub {C : FloatCfg} [R : Rounding] (q : ℚ) :

Round a rational to a subnormal representation using the mode in scope.

Equations

roundsub q = subnormalRound (roundFunction R) q

Instances For

theorem subnormal_roundsub_valid {C : FloatCfg} [R : Rounding] :

ValidSubnormalRounding roundsub

theorem subnormal_roundsub_coe {C : FloatCfg} [R : Rounding] (s : SubnormRep C) (h : s.nonzero) :

roundsub (subnormalToQ s) = s

inductive Flean.Float (C : FloatCfg) :

A floating-point number of the format C: a signed infinity, NaN, a valid normal representation, or a valid subnormal representation.

inf {C : FloatCfg} : Bool → Float C
A signed infinity (true for negative).
nan {C : FloatCfg} : Float C
Not-a-number.
normal {C : FloatCfg} (f : FloatRep C) : f.validE → f.validM → Float C
A normal float, given by a representation with valid exponent and mantissa.
subnormal {C : FloatCfg} (sm : SubnormRep C) : sm.m < C.prec → Float C
A subnormal float, given by a representation with mantissa below precision.

Instances For

def toFloat {C : FloatCfg} [R : Rounding] (q : ℚ) :

Round a rational to the nearest representable float under the mode in scope.

Equations

One or more equations did not get rendered due to their size.

Instances For

def toRat {C : FloatCfg} :

Flean.Float C → ℚ

The exact rational value of a float (0 for infinities and NaN).

Equations

toRat (Flean.Float.inf a) = 0
toRat Flean.Float.nan = 0
toRat (Flean.Float.normal f a a_1) = coeQ f
toRat (Flean.Float.subnormal sm a) = subnormalToQ sm

Instances For

def Flean.Float.IsFinite {C : FloatCfg} :

Float C → Prop

Whether a float is finite (not an infinity or NaN).

Equations

(Flean.Float.inf a).IsFinite = (false = true)
Flean.Float.nan.IsFinite = (false = true)
x✝.IsFinite = (true = true)

Instances For

def Flean.Float.IsZero {C : FloatCfg} :

Float C → Prop

Whether a float is (a) zero.

Equations

(Flean.Float.subnormal { s := s, m := 0 } a).IsZero = (true = true)
x✝.IsZero = (false = true)

Instances For

theorem subnorm_eq_0_iff_to_q {C : FloatCfg} (sm : SubnormRep C) :

subnormalToQ sm = 0 ↔ sm.m = 0

theorem is_zero_iff_subnormal_to_q {C : FloatCfg} (sm : SubnormRep C) (h : sm.m < C.prec) :

subnormalToQ sm = 0 ↔ (Flean.Float.subnormal sm h).IsZero

theorem subnormal_range {C : FloatCfg} (f : SubnormRep C) (vm : f.m < C.prec) (ne_zero : f.nonzero) :

Int.log 2 |subnormalToQ f| < C.emin

def maxFloat (C : FloatCfg) :

The largest finite float of the format C.

Equations

maxFloat C = Flean.Float.normal (maxFloatRep C) ⋯ ⋯

Instances For

theorem to_rat_max_float {C : FloatCfg} :

toRat (maxFloat C) = maxFloatQ C

theorem log_lt_emax_of_max_float {C : FloatCfg} {q : ℚ} (q_nonneg : q ≠ 0) (h : |q| ≤ maxFloatQ C) :

Int.log 2 |q| ≤ C.emax

theorem float_range {C : FloatCfg} (f : Flean.Float C) :

|toRat f| ≤ maxFloatQ C

theorem to_float_to_rat {C : FloatCfg} [R : Rounding] (f : Flean.Float C) (finite : f.IsFinite) (nonzero : ¬f.IsZero) :

toFloat (toRat f) = f

theorem splitIsFinite {C : FloatCfg} [R : Rounding] {q : ℚ} (h : (toFloat q).IsFinite) :

|q| ≤ 2 ^ C.emin ∧ toRat (toFloat q) = subnormalToQ (roundsub q) ∨ 2 ^ C.emin ≤ |q| ∧ toRat (toFloat q) = coeQ (roundRep q)

theorem subnormal_to_q_emin {C : FloatCfg} :

subnormalToQ { s := false, m := C.prec } = 2 ^ C.emin

theorem subnormal_to_q_neg_emin {C : FloatCfg} :

subnormalToQ { s := true, m := C.prec } = -2 ^ C.emin

theorem coe_q_emin {C : FloatCfg} :

coeQ { s := false, e := C.emin, m := 0 } = 2 ^ C.emin

theorem coe_q_neg_emin {C : FloatCfg} :

coeQ { s := true, e := C.emin, m := 0 } = -2 ^ C.emin

theorem roundsub_emin {C : FloatCfg} [R : Rounding] :

roundsub (2 ^ C.emin) = { s := false, m := C.prec }

theorem roundsub_neg_emin {C : FloatCfg} [R : Rounding] :

roundsub (-2 ^ C.emin) = { s := true, m := C.prec }

theorem roundrep_emin {C : FloatCfg} [R : Rounding] :

roundRep (2 ^ C.emin) = { s := false, e := C.emin, m := 0 }

theorem roundrep_neg_emin {C : FloatCfg} [R : Rounding] :

roundRep (-2 ^ C.emin) = { s := true, e := C.emin, m := 0 }

def Flean.Float.neg {C : FloatCfg} :

Float C → Float C

Negate a float by flipping the sign of each case.

Equations

(Flean.Float.inf a).neg = Flean.Float.inf (decide ¬a = true)
Flean.Float.nan.neg = Flean.Float.nan
(Flean.Float.normal f a a_1).neg = Flean.Float.normal f.neg a a_1
(Flean.Float.subnormal sm a).neg = Flean.Float.subnormal sm.neg a

Instances For

theorem to_float_neg {C : FloatCfg} (f : Flean.Float C) (h : f.IsFinite) :

toRat f.neg = -toRat f

theorem float_le_float_of {C : FloatCfg} [R : Rounding] (q1 q2 : ℚ) (h1 : (toFloat q1).IsFinite) (h2 : (toFloat q2).IsFinite) (h : q1 ≤ q2) :

toRat (toFloat q1) ≤ toRat (toFloat q2)

def toFloatDown {C : FloatCfg} :

ℚ → Flean.Float C

Round a rational toward negative infinity to a float.

Equations

toFloatDown = toFloat

Instances For

def toFloatUp {C : FloatCfg} :

ℚ → Flean.Float C

Round a rational toward positive infinity to a float.

Equations

toFloatUp = toFloat

Instances For

def toFloatNearest {C : FloatCfg} :

ℚ → Flean.Float C

Round a rational to the nearest float (ties to even).

Equations

toFloatNearest = toFloat

Instances For

theorem float_down_le {C : FloatCfg} (q : ℚ) (h : (toFloatDown q).IsFinite) :

toRat (toFloatDown q) ≤ q

theorem le_float_up {C : FloatCfg} (q : ℚ) (h : (toFloatUp q).IsFinite) :

q ≤ toRat (toFloatUp q)

theorem to_float_boundary {C : FloatCfg} (R : Rounding) {q : ℚ} (h : |q| = 2 ^ C.emin) :

toRat (toFloat q) = q

theorem float_up_minus_down {C : FloatCfg} (q : ℚ) (h : (toFloatDown q).IsFinite) (h' : (toFloatUp q).IsFinite) :

toRat (toFloatUp q) - toRat (toFloatDown q) ≤ max (2 ^ C.emin / ↑C.prec) (2 ^ Int.log 2 |q| / ↑C.prec)

theorem float_eq_up_or_down {C : FloatCfg} [R : Rounding] (q : ℚ) :

toFloat q = toFloatDown q ∨ toFloat q = toFloatUp q

theorem float_error_old {C : FloatCfg} [R : Rounding] (q : ℚ) (h : (toFloatDown q).IsFinite) (h' : (toFloatUp q).IsFinite) :

|toRat (toFloat q) - q| ≤ max (2 ^ C.emin / ↑C.prec) (2 ^ Int.log 2 |q| / ↑C.prec)

theorem float_error {C : FloatCfg} [R : Rounding] (q : ℚ) (h : (toFloat q).IsFinite) :

|toRat (toFloat q) - q| ≤ max (2 ^ C.emin / ↑C.prec) (2 ^ Int.log 2 |q| / ↑C.prec)

theorem to_float_in_range {C : FloatCfg} [R : Rounding] {q : ℚ} (h : |q| ≤ maxFloatQ C) :

(toFloat q).IsFinite

theorem float_error' {C : FloatCfg} [R : Rounding] (q : ℚ) (h : |q| ≤ maxFloatQ C) :

|toRat (toFloat q) - q| ≤ max (2 ^ C.emin / ↑C.prec) (2 ^ Int.log 2 |q| / ↑C.prec)

theorem float_nearest_error {C : FloatCfg} (q : ℚ) (h : (toFloatNearest q).IsFinite) :

|q - toRat (toFloatNearest q)| ≤ max (2 ^ (Int.log 2 |q| - 1) / ↑C.prec) (2 ^ (C.emin - 1) / ↑C.prec)