Formalization of floating-point numbers, using the Flocq library.
Require Import Coqlib.
Require Import Integers.
Require Import Fappli_IEEE.
Require Import Fappli_IEEE_bits.
Require Import Fappli_IEEE_extra.
Require Import Fcore.
Require Import Program.
Require Archi.
Close Scope R_scope.
Definition float :=
binary64.
(* the type of IEE754 double-precision FP numbers *)
Definition float32 :=
binary32.
(* the type of IEE754 single-precision FP numbers *)
Boolean-valued comparisons
Definition cmp_of_comparison (
c:
comparison) (
x:
option Datatypes.comparison) :
bool :=
match c with
|
Ceq =>
match x with Some Eq =>
true |
_ =>
false end
|
Cne =>
match x with Some Eq =>
false |
_ =>
true end
|
Clt =>
match x with Some Lt =>
true |
_ =>
false end
|
Cle =>
match x with Some(
Lt|
Eq) =>
true |
_ =>
false end
|
Cgt =>
match x with Some Gt =>
true |
_ =>
false end
|
Cge =>
match x with Some(
Gt|
Eq) =>
true |
_ =>
false end
end.
Lemma cmp_of_comparison_swap:
forall c x,
cmp_of_comparison (
swap_comparison c)
x =
cmp_of_comparison c (
match x with None =>
None |
Some x =>
Some (
CompOpp x)
end).
Proof.
intros. destruct c; destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_ne_eq:
forall x,
cmp_of_comparison Cne x =
negb (
cmp_of_comparison Ceq x).
Proof.
intros. destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_lt_eq_false:
forall x,
cmp_of_comparison Clt x =
true ->
cmp_of_comparison Ceq x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Lemma cmp_of_comparison_le_lt_eq:
forall x,
cmp_of_comparison Cle x =
cmp_of_comparison Clt x ||
cmp_of_comparison Ceq x.
Proof.
destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_gt_eq_false:
forall x,
cmp_of_comparison Cgt x =
true ->
cmp_of_comparison Ceq x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Lemma cmp_of_comparison_ge_gt_eq:
forall x,
cmp_of_comparison Cge x =
cmp_of_comparison Cgt x ||
cmp_of_comparison Ceq x.
Proof.
destruct x as [[]|]; reflexivity.
Qed.
Lemma cmp_of_comparison_lt_gt_false:
forall x,
cmp_of_comparison Clt x =
true ->
cmp_of_comparison Cgt x =
true ->
False.
Proof.
destruct x as [[]|]; simpl; intros; discriminate.
Qed.
Local Notation __ := (
refl_equal Datatypes.Lt).
Local Hint Extern 1 (
Prec_gt_0 _) =>
exact (
refl_equal Datatypes.Lt).
Local Hint Extern 1 (
_ <
_) =>
exact (
refl_equal Datatypes.Lt).
Double-precision FP numbers
Module Float.
NaN payload manipulations
The following definitions are not part of the IEEE754 standard but
apply to all architectures supported by CompCert.
Transform a Nan payload to a quiet Nan payload.
Program Definition transform_quiet_pl (
pl:
nan_pl 53) :
nan_pl 53 :=
Pos.lor pl (
iter_nat xO 51
xH).
Next Obligation.
Lemma nan_payload_fequal:
forall prec (
p1 p2:
nan_pl prec),
proj1_sig p1 =
proj1_sig p2 ->
p1 =
p2.
Proof.
Lemma lor_idempotent:
forall x y,
Pos.lor (
Pos.lor x y)
y =
Pos.lor x y.
Proof.
induction x; destruct y; simpl; f_equal; auto;
induction y; simpl; f_equal; auto.
Qed.
Lemma transform_quiet_pl_idempotent:
forall pl,
transform_quiet_pl (
transform_quiet_pl pl) =
transform_quiet_pl pl.
Proof.
Nan payload operations for single <-> double conversions.
Definition expand_pl (
pl:
nan_pl 24) :
nan_pl 53.
Proof.
Definition of_single_pl (
s:
bool) (
pl:
nan_pl 24) : (
bool *
nan_pl 53) :=
(
s,
if Archi.float_of_single_preserves_sNaN
then expand_pl pl
else transform_quiet_pl (
expand_pl pl)).
Definition reduce_pl (
pl:
nan_pl 53) :
nan_pl 24.
Proof.
Definition to_single_pl (
s:
bool) (
pl:
nan_pl 53) : (
bool *
nan_pl 24) :=
(
s,
reduce_pl (
transform_quiet_pl pl)).
NaN payload operations for opposite and absolute value.
Definition neg_pl (
s:
bool) (
pl:
nan_pl 53) := (
negb s,
pl).
Definition abs_pl (
s:
bool) (
pl:
nan_pl 53) := (
false,
pl).
The NaN payload operations for two-argument arithmetic operations
are not part of the IEEE754 standard, but all architectures of
Compcert share a similar NaN behavior, parameterized by:
-
a "default" payload which occurs when an operation generates a NaN from
non-NaN arguments;
-
a choice function determining which of the payload arguments to choose,
when an operation is given two NaN arguments.
Definition binop_pl (
x y:
binary64) :
bool*
nan_pl 53 :=
match x,
y with
|
B754_nan s1 pl1,
B754_nan s2 pl2 =>
if Archi.choose_binop_pl_64 s1 pl1 s2 pl2
then (
s2,
transform_quiet_pl pl2)
else (
s1,
transform_quiet_pl pl1)
|
B754_nan s1 pl1,
_ => (
s1,
transform_quiet_pl pl1)
|
_,
B754_nan s2 pl2 => (
s2,
transform_quiet_pl pl2)
|
_,
_ =>
Archi.default_pl_64
end.
Operations over double-precision floats
Definition zero:
float :=
B754_zero _ _ false.
(* the float +0.0 *)
Definition eq_dec:
forall (
f1 f2:
float), {
f1 =
f2} + {
f1 <>
f2} :=
Beq_dec _ _.
Arithmetic operations
Definition neg:
float ->
float :=
Bopp _ _ neg_pl.
(* opposite (change sign) *)
Definition abs:
float ->
float :=
Babs _ _ abs_pl.
(* absolute value (set sign to +) *)
Definition add:
float ->
float ->
float :=
Bplus 53 1024
__ __ binop_pl mode_NE.
(* addition *)
Definition sub:
float ->
float ->
float :=
Bminus 53 1024
__ __ binop_pl mode_NE.
(* subtraction *)
Definition mul:
float ->
float ->
float :=
Bmult 53 1024
__ __ binop_pl mode_NE.
(* multiplication *)
Definition div:
float ->
float ->
float :=
Bdiv 53 1024
__ __ binop_pl mode_NE.
(* division *)
Definition cmp (
c:
comparison) (
f1 f2:
float) :
bool :=
(* comparison *)
cmp_of_comparison c (
Bcompare _ _ f1 f2).
Conversions
Definition of_single:
float32 ->
float :=
Bconv _ _ 53 1024
__ __ of_single_pl mode_NE.
Definition to_single:
float ->
float32 :=
Bconv _ _ 24 128
__ __ to_single_pl mode_NE.
Definition to_int (
f:
float):
option int :=
(* conversion to signed 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f Int.min_signed Int.max_signed).
Definition to_intu (
f:
float):
option int :=
(* conversion to unsigned 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f 0
Int.max_unsigned).
Definition to_long (
f:
float):
option int64 :=
(* conversion to signed 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f Int64.min_signed Int64.max_signed).
Definition to_longu (
f:
float):
option int64 :=
(* conversion to unsigned 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f 0
Int64.max_unsigned).
Definition of_int (
n:
int):
float :=
(* conversion from signed 32-bit int *)
BofZ 53 1024
__ __ (
Int.signed n).
Definition of_intu (
n:
int):
float:=
(* conversion from unsigned 32-bit int *)
BofZ 53 1024
__ __ (
Int.unsigned n).
Definition of_long (
n:
int64):
float :=
(* conversion from signed 64-bit int *)
BofZ 53 1024
__ __ (
Int64.signed n).
Definition of_longu (
n:
int64):
float:=
(* conversion from unsigned 64-bit int *)
BofZ 53 1024
__ __ (
Int64.unsigned n).
Definition from_parsed (
base:
positive) (
intPart:
positive) (
expPart:
Z) :
float :=
Bparse 53 1024
__ __ base intPart expPart.
Conversions between floats and their concrete in-memory representation
as a sequence of 64 bits.
Definition to_bits (
f:
float):
int64 :=
Int64.repr (
bits_of_b64 f).
Definition of_bits (
b:
int64):
float :=
b64_of_bits (
Int64.unsigned b).
Definition from_words (
hi lo:
int) :
float :=
of_bits (
Int64.ofwords hi lo).
Properties
Below are the only properties of floating-point arithmetic that we
rely on in the compiler proof.
Some tactics *
Ltac compute_this val :=
let x :=
fresh in set val as x in *;
vm_compute in x;
subst x.
Ltac smart_omega :=
simpl radix_val in *;
simpl Zpower in *;
compute_this Int.modulus;
compute_this Int.half_modulus;
compute_this Int.max_unsigned;
compute_this Int.min_signed;
compute_this Int.max_signed;
compute_this Int64.modulus;
compute_this Int64.half_modulus;
compute_this Int64.max_unsigned;
compute_this (
Zpower_pos 2 1024);
compute_this (
Zpower_pos 2 53);
compute_this (
Zpower_pos 2 52);
compute_this (
Zpower_pos 2 32);
zify;
omega.
Commutativity properties of addition and multiplication.
Theorem add_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
add x y =
add y x.
Proof.
intros.
apply Bplus_commut.
destruct x,
y;
try reflexivity.
simpl in H.
intuition congruence.
Qed.
Theorem mul_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
mul x y =
mul y x.
Proof.
intros.
apply Bmult_commut.
destruct x,
y;
try reflexivity.
simpl in H.
intuition congruence.
Qed.
Multiplication by 2 is diagonal addition.
Theorem mul2_add:
forall f,
add f f =
mul f (
of_int (
Int.repr 2%
Z)).
Proof.
Divisions that can be turned into multiplication by an inverse.
Definition exact_inverse :
float ->
option float :=
Bexact_inverse 53 1024
__ __.
Theorem div_mul_inverse:
forall x y z,
exact_inverse y =
Some z ->
div x y =
mul x z.
Proof.
intros.
apply Bdiv_mult_inverse;
auto.
intros.
destruct x0;
try discriminate.
simpl.
transitivity (
b,
transform_quiet_pl n).
destruct y0;
reflexivity ||
discriminate.
destruct z0;
reflexivity ||
discriminate.
Qed.
Properties of comparisons.
Theorem cmp_swap:
forall c x y,
cmp (
swap_comparison c)
x y =
cmp c y x.
Proof.
Theorem cmp_ne_eq:
forall f1 f2,
cmp Cne f1 f2 =
negb (
cmp Ceq f1 f2).
Proof.
Theorem cmp_lt_eq_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Ceq f1 f2 =
true ->
False.
Proof.
Theorem cmp_le_lt_eq:
forall f1 f2,
cmp Cle f1 f2 =
cmp Clt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_gt_eq_false:
forall x y,
cmp Cgt x y =
true ->
cmp Ceq x y =
true ->
False.
Proof.
Theorem cmp_ge_gt_eq:
forall f1 f2,
cmp Cge f1 f2 =
cmp Cgt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_lt_gt_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Cgt f1 f2 =
true ->
False.
Proof.
Properties of conversions to/from in-memory representation.
The conversions are bijective (one-to-one).
Theorem of_to_bits:
forall f,
of_bits (
to_bits f) =
f.
Proof.
Theorem to_of_bits:
forall b,
to_bits (
of_bits b) =
b.
Proof.
Conversions between floats and unsigned ints can be defined
in terms of conversions between floats and signed ints.
(Most processors provide only the latter, forcing the compiler
to emulate the former.)
Definition ox8000_0000 :=
Int.repr Int.half_modulus.
(* 0x8000_0000 *)
Theorem of_intu_of_int_1:
forall x,
Int.ltu x ox8000_0000 =
true ->
of_intu x =
of_int x.
Proof.
Theorem of_intu_of_int_2:
forall x,
Int.ltu x ox8000_0000 =
false ->
of_intu x =
add (
of_int (
Int.sub x ox8000_0000)) (
of_intu ox8000_0000).
Proof.
Theorem to_intu_to_int_1:
forall x n,
cmp Clt x (
of_intu ox8000_0000) =
true ->
to_intu x =
Some n ->
to_int x =
Some n.
Proof.
Theorem to_intu_to_int_2:
forall x n,
cmp Clt x (
of_intu ox8000_0000) =
false ->
to_intu x =
Some n ->
to_int (
sub x (
of_intu ox8000_0000)) =
Some (
Int.sub n ox8000_0000).
Proof.
Conversions from ints to floats can be defined as bitwise manipulations
over the in-memory representation. This is what the PowerPC port does.
The trick is that from_words 0x4330_0000 x is the float
2^52 + of_intu x.
Definition ox4330_0000 :=
Int.repr 1127219200.
(* 0x4330_0000 *)
Lemma split_bits_or:
forall x,
split_bits 52 11 (
Int64.unsigned (
Int64.ofwords ox4330_0000 x)) = (
false,
Int.unsigned x, 1075).
Proof.
Lemma from_words_value:
forall x,
B2R _ _ (
from_words ox4330_0000 x) = (
bpow radix2 52 +
Z2R (
Int.unsigned x))%
R
/\
is_finite _ _ (
from_words ox4330_0000 x) =
true
/\
Bsign _ _ (
from_words ox4330_0000 x) =
false.
Proof.
Lemma from_words_eq:
forall x,
from_words ox4330_0000 x =
BofZ 53 1024
__ __ (2^52 +
Int.unsigned x).
Proof.
Theorem of_intu_from_words:
forall x,
of_intu x =
sub (
from_words ox4330_0000 x) (
from_words ox4330_0000 Int.zero).
Proof.
Lemma ox8000_0000_signed_unsigned:
forall x,
Int.unsigned (
Int.add x ox8000_0000) =
Int.signed x +
Int.half_modulus.
Proof.
Theorem of_int_from_words:
forall x,
of_int x =
sub (
from_words ox4330_0000 (
Int.add x ox8000_0000))
(
from_words ox4330_0000 ox8000_0000).
Proof.
Definition ox4530_0000 :=
Int.repr 1160773632.
(* 0x4530_0000 *)
Lemma split_bits_or':
forall x,
split_bits 52 11 (
Int64.unsigned (
Int64.ofwords ox4530_0000 x)) = (
false,
Int.unsigned x, 1107).
Proof.
Lemma from_words_value':
forall x,
B2R _ _ (
from_words ox4530_0000 x) = (
bpow radix2 84 +
Z2R (
Int.unsigned x *
two_p 32))%
R
/\
is_finite _ _ (
from_words ox4530_0000 x) =
true
/\
Bsign _ _ (
from_words ox4530_0000 x) =
false.
Proof.
Lemma from_words_eq':
forall x,
from_words ox4530_0000 x =
BofZ 53 1024
__ __ (2^84 +
Int.unsigned x * 2^32).
Proof.
Theorem of_longu_from_words:
forall l,
of_longu l =
add (
sub (
from_words ox4530_0000 (
Int64.hiword l))
(
from_words ox4530_0000 (
Int.repr (
two_p 20))))
(
from_words ox4330_0000 (
Int64.loword l)).
Proof.
Theorem of_long_from_words:
forall l,
of_long l =
add (
sub (
from_words ox4530_0000 (
Int.add (
Int64.hiword l)
ox8000_0000))
(
from_words ox4530_0000 (
Int.repr (
two_p 20+
two_p 31))))
(
from_words ox4330_0000 (
Int64.loword l)).
Proof.
Conversions from 64-bit integers can be expressed in terms of
conversions from their 32-bit halves.
Theorem of_longu_decomp:
forall l,
of_longu l =
add (
mul (
of_intu (
Int64.hiword l)) (
BofZ 53 1024
__ __ (2^32)))
(
of_intu (
Int64.loword l)).
Proof.
Theorem of_long_decomp:
forall l,
of_long l =
add (
mul (
of_int (
Int64.hiword l)) (
BofZ 53 1024
__ __ (2^32)))
(
of_intu (
Int64.loword l)).
Proof.
Conversions from unsigned longs can be expressed in terms of conversions from signed longs.
If the unsigned long is too big, a round-to-odd must be performed on it
to avoid double rounding.
Theorem of_longu_of_long_1:
forall x,
Int64.ltu x (
Int64.repr Int64.half_modulus) =
true ->
of_longu x =
of_long x.
Proof.
Theorem of_longu_of_long_2:
forall x,
Int64.ltu x (
Int64.repr Int64.half_modulus) =
false ->
of_longu x =
mul (
of_long (
Int64.or (
Int64.shru x Int64.one)
(
Int64.and x Int64.one)))
(
of_int (
Int.repr 2)).
Proof.
End Float.
Single-precision FP numbers
Module Float32.
NaN payload manipulations
Program Definition transform_quiet_pl (
pl:
nan_pl 24) :
nan_pl 24 :=
Pos.lor pl (
iter_nat xO 22
xH).
Next Obligation.
Lemma transform_quiet_pl_idempotent:
forall pl,
transform_quiet_pl (
transform_quiet_pl pl) =
transform_quiet_pl pl.
Proof.
Definition neg_pl (
s:
bool) (
pl:
nan_pl 24) := (
negb s,
pl).
Definition abs_pl (
s:
bool) (
pl:
nan_pl 24) := (
false,
pl).
Definition binop_pl (
x y:
binary32) :
bool*
nan_pl 24 :=
match x,
y with
|
B754_nan s1 pl1,
B754_nan s2 pl2 =>
if Archi.choose_binop_pl_32 s1 pl1 s2 pl2
then (
s2,
transform_quiet_pl pl2)
else (
s1,
transform_quiet_pl pl1)
|
B754_nan s1 pl1,
_ => (
s1,
transform_quiet_pl pl1)
|
_,
B754_nan s2 pl2 => (
s2,
transform_quiet_pl pl2)
|
_,
_ =>
Archi.default_pl_32
end.
Operations over single-precision floats
Definition zero:
float32 :=
B754_zero _ _ false.
(* the float +0.0 *)
Definition eq_dec:
forall (
f1 f2:
float32), {
f1 =
f2} + {
f1 <>
f2} :=
Beq_dec _ _.
Arithmetic operations
Definition neg:
float32 ->
float32 :=
Bopp _ _ neg_pl.
(* opposite (change sign) *)
Definition abs:
float32 ->
float32 :=
Babs _ _ abs_pl.
(* absolute value (set sign to +) *)
Definition add:
float32 ->
float32 ->
float32 :=
Bplus 24 128
__ __ binop_pl mode_NE.
(* addition *)
Definition sub:
float32 ->
float32 ->
float32 :=
Bminus 24 128
__ __ binop_pl mode_NE.
(* subtraction *)
Definition mul:
float32 ->
float32 ->
float32 :=
Bmult 24 128
__ __ binop_pl mode_NE.
(* multiplication *)
Definition div:
float32 ->
float32 ->
float32 :=
Bdiv 24 128
__ __ binop_pl mode_NE.
(* division *)
Definition cmp (
c:
comparison) (
f1 f2:
float32) :
bool :=
(* comparison *)
cmp_of_comparison c (
Bcompare _ _ f1 f2).
Conversions
Definition of_double :
float ->
float32 :=
Float.to_single.
Definition to_double :
float32 ->
float :=
Float.of_single.
Definition to_int (
f:
float32):
option int :=
(* conversion to signed 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f Int.min_signed Int.max_signed).
Definition to_intu (
f:
float32):
option int :=
(* conversion to unsigned 32-bit int *)
option_map Int.repr (
ZofB_range _ _ f 0
Int.max_unsigned).
Definition to_long (
f:
float32):
option int64 :=
(* conversion to signed 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f Int64.min_signed Int64.max_signed).
Definition to_longu (
f:
float32):
option int64 :=
(* conversion to unsigned 64-bit int *)
option_map Int64.repr (
ZofB_range _ _ f 0
Int64.max_unsigned).
Definition of_int (
n:
int):
float32 :=
(* conversion from signed 32-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int.signed n).
Definition of_intu (
n:
int):
float32 :=
(* conversion from unsigned 32-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int.unsigned n).
Definition of_long (
n:
int64):
float32 :=
(* conversion from signed 64-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int64.signed n).
Definition of_longu (
n:
int64):
float32 :=
(* conversion from unsigned 64-bit int to single-precision float *)
BofZ 24 128
__ __ (
Int64.unsigned n).
Definition from_parsed (
base:
positive) (
intPart:
positive) (
expPart:
Z) :
float32 :=
Bparse 24 128
__ __ base intPart expPart.
Conversions between floats and their concrete in-memory representation
as a sequence of 32 bits.
Definition to_bits (
f:
float32) :
int :=
Int.repr (
bits_of_b32 f).
Definition of_bits (
b:
int):
float32 :=
b32_of_bits (
Int.unsigned b).
Properties
Commutativity properties of addition and multiplication.
Theorem add_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
add x y =
add y x.
Proof.
intros.
apply Bplus_commut.
destruct x,
y;
try reflexivity.
simpl in H.
intuition congruence.
Qed.
Theorem mul_commut:
forall x y,
is_nan _ _ x =
false \/
is_nan _ _ y =
false ->
mul x y =
mul y x.
Proof.
intros.
apply Bmult_commut.
destruct x,
y;
try reflexivity.
simpl in H.
intuition congruence.
Qed.
Multiplication by 2 is diagonal addition.
Theorem mul2_add:
forall f,
add f f =
mul f (
of_int (
Int.repr 2%
Z)).
Proof.
Divisions that can be turned into multiplication by an inverse.
Definition exact_inverse :
float32 ->
option float32 :=
Bexact_inverse 24 128
__ __.
Theorem div_mul_inverse:
forall x y z,
exact_inverse y =
Some z ->
div x y =
mul x z.
Proof.
intros.
apply Bdiv_mult_inverse;
auto.
intros.
destruct x0;
try discriminate.
simpl.
transitivity (
b,
transform_quiet_pl n).
destruct y0;
reflexivity ||
discriminate.
destruct z0;
reflexivity ||
discriminate.
Qed.
Properties of comparisons.
Theorem cmp_swap:
forall c x y,
cmp (
swap_comparison c)
x y =
cmp c y x.
Proof.
Theorem cmp_ne_eq:
forall f1 f2,
cmp Cne f1 f2 =
negb (
cmp Ceq f1 f2).
Proof.
Theorem cmp_lt_eq_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Ceq f1 f2 =
true ->
False.
Proof.
Theorem cmp_le_lt_eq:
forall f1 f2,
cmp Cle f1 f2 =
cmp Clt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_gt_eq_false:
forall x y,
cmp Cgt x y =
true ->
cmp Ceq x y =
true ->
False.
Proof.
Theorem cmp_ge_gt_eq:
forall f1 f2,
cmp Cge f1 f2 =
cmp Cgt f1 f2 ||
cmp Ceq f1 f2.
Proof.
Theorem cmp_lt_gt_false:
forall f1 f2,
cmp Clt f1 f2 =
true ->
cmp Cgt f1 f2 =
true ->
False.
Proof.
Theorem cmp_double:
forall f1 f2 c,
cmp c f1 f2 =
Float.cmp c (
to_double f1) (
to_double f2).
Proof.
Properties of conversions to/from in-memory representation.
The conversions are bijective (one-to-one).
Theorem of_to_bits:
forall f,
of_bits (
to_bits f) =
f.
Proof.
Theorem to_of_bits:
forall b,
to_bits (
of_bits b) =
b.
Proof.
Conversions from 32-bit integers to single-precision floats can
be decomposed into a conversion to a double-precision float,
followed by a Float32.of_double conversion. No double rounding occurs.
Theorem of_int_double:
forall n,
of_int n =
of_double (
Float.of_int n).
Proof.
Theorem of_intu_double:
forall n,
of_intu n =
of_double (
Float.of_intu n).
Proof.
Conversion of single-precision floats to integers can be decomposed
into a Float32.to_double extension, followed by a double-precision-to-int
conversion.
Theorem to_int_double:
forall f n,
to_int f =
Some n ->
Float.to_int (
to_double f) =
Some n.
Proof.
Theorem to_intu_double:
forall f n,
to_intu f =
Some n ->
Float.to_intu (
to_double f) =
Some n.
Proof.
Theorem to_long_double:
forall f n,
to_long f =
Some n ->
Float.to_long (
to_double f) =
Some n.
Proof.
Theorem to_longu_double:
forall f n,
to_longu f =
Some n ->
Float.to_longu (
to_double f) =
Some n.
Proof.
Conversions from 64-bit integers to single-precision floats can be expressed
as conversion to a double-precision float followed by a Float32.of_double conversion.
To avoid double rounding when the integer is large (above 2^53), a round
to odd must be performed on the integer before conversion to double-precision float.
Lemma int_round_odd_plus:
forall p n, 0 <=
p ->
int_round_odd n p =
Z.land (
Z.lor n (
Z.land n (2^
p-1) + (2^
p-1))) (-(2^
p)).
Proof.
intros.
assert (
POS: 0 < 2^
p)
by (
apply (
Zpower_gt_0 radix2);
auto).
assert (
A:
Z.land n (2^
p-1) =
n mod 2^
p).
{
rewrite <-
Z.land_ones by auto.
f_equal.
rewrite Z.ones_equiv.
omega. }
rewrite A.
assert (
B: 0 <=
n mod 2^
p < 2^
p).
{
apply Z_mod_lt.
omega. }
set (
m :=
n mod 2^
p + (2^
p-1))
in *.
assert (
C:
m / 2^
p =
if zeq (
n mod 2^
p) 0
then 0
else 1).
{
unfold m.
destruct (
zeq (
n mod 2^
p) 0).
rewrite e.
apply Zdiv_small.
omega.
eapply Zdiv_unique with (
n mod 2^
p - 1).
ring.
omega. }
assert (
D:
Z.testbit m p =
if zeq (
n mod 2^
p) 0
then false else true).
{
destruct (
zeq (
n mod 2^
p) 0).
apply Z.testbit_false;
auto.
rewrite C;
auto.
apply Z.testbit_true;
auto.
rewrite C;
auto. }
assert (
E:
forall i,
p <
i ->
Z.testbit m i =
false).
{
intros.
apply Z.testbit_false.
omega.
replace (
m / 2^
i)
with 0.
auto.
symmetry.
apply Zdiv_small.
unfold m.
split.
omega.
apply Zlt_le_trans with (2 * 2^
p).
omega.
change 2
with (2^1)
at 1.
rewrite <- (
Zpower_plus radix2)
by omega.
apply Zpower_le.
omega. }
assert (
F:
forall i, 0 <=
i ->
Z.testbit (-2^
p)
i =
if zlt i p then false else true).
{
intros.
rewrite Z.bits_opp by auto.
rewrite <-
Z.ones_equiv.
destruct (
zlt i p).
rewrite Z.ones_spec_low by omega.
auto.
rewrite Z.ones_spec_high by omega.
auto. }
apply int_round_odd_bits;
auto.
-
intros.
rewrite Z.land_spec,
F,
zlt_true by omega.
apply andb_false_r.
-
rewrite Z.land_spec,
Z.lor_spec,
D,
F,
zlt_false,
andb_true_r by omega.
destruct (
Z.eqb (
n mod 2^
p) 0)
eqn:
Z.
rewrite Z.eqb_eq in Z.
rewrite Z,
zeq_true.
apply orb_false_r.
rewrite Z.eqb_neq in Z.
rewrite zeq_false by auto.
apply orb_true_r.
-
intros.
rewrite Z.land_spec,
Z.lor_spec,
E,
F,
zlt_false,
andb_true_r by omega.
apply orb_false_r.
Qed.
Lemma of_long_round_odd:
forall n conv_nan,
2^36 <=
Z.abs n < 2^64 ->
BofZ 24 128
__ __ n =
Bconv _ _ 24 128
__ __ conv_nan mode_NE (
BofZ 53 1024
__ __ (
Z.land (
Z.lor n ((
Z.land n 2047) + 2047)) (-2048))).
Proof.
Theorem of_longu_double_1:
forall n,
Int64.unsigned n <= 2^53 ->
of_longu n =
of_double (
Float.of_longu n).
Proof.
Theorem of_longu_double_2:
forall n,
2^36 <=
Int64.unsigned n ->
of_longu n =
of_double (
Float.of_longu
(
Int64.and (
Int64.or n
(
Int64.add (
Int64.and n (
Int64.repr 2047))
(
Int64.repr 2047)))
(
Int64.repr (-2048)))).
Proof.
Theorem of_long_double_1:
forall n,
Z.abs (
Int64.signed n) <= 2^53 ->
of_long n =
of_double (
Float.of_long n).
Proof.
Theorem of_long_double_2:
forall n,
2^36 <=
Z.abs (
Int64.signed n) ->
of_long n =
of_double (
Float.of_long
(
Int64.and (
Int64.or n
(
Int64.add (
Int64.and n (
Int64.repr 2047))
(
Int64.repr 2047)))
(
Int64.repr (-2048)))).
Proof.
End Float32.
Global Opaque
Float.zero Float.eq_dec Float.neg Float.abs Float.of_single Float.to_single
Float.of_int Float.of_intu Float.of_long Float.of_longu
Float.to_int Float.to_intu Float.to_long Float.to_longu
Float.add Float.sub Float.mul Float.div Float.cmp
Float.to_bits Float.of_bits Float.from_words.
Global Opaque
Float32.zero Float32.eq_dec Float32.neg Float32.abs
Float32.of_int Float32.of_intu Float32.of_long Float32.of_longu
Float32.to_int Float32.to_intu Float32.to_long Float32.to_longu
Float32.add Float32.sub Float32.mul Float32.div Float32.cmp
Float32.to_bits Float32.of_bits.