Require Import ZArith .
Require Import Znumtheory .
Require Export Modulo .

Local Open Scope Z_scope .
  
Variable D : Z .
Hypothesis radix : D = 2^64 .

Lemma D438 : (D^4 == 38 [ 2^255 - 19 ]) .
rewrite radix .
red .
cut ((2 ^ 64) ^ 4= 2 * 2 ^ 255);
[
  intros H; rewrite H; clear H
|
  compute; auto with *
] .
cut (2 * 2 ^ 255 - 38 = 2 * (2 ^ 255 - 19));
[
  intros H; rewrite H; clear H; auto with *
|
  compute; auto with *
] .
Qed .

Ltac cut_eqn eqn :=
  cut eqn;
  [ intros HH; rewrite HH; clear HH | idtac ] .

Ltac cut_modulo exp0 exp1 :=
  cut (exp0 == exp1 [ 2^255 - 19 ]);
  [
    intros HH; apply modulo_tran with exp0; [ clear HH | trivial ]
  |
    idtac
  ] .

Ltac lower_bound_tac :=
  repeat (apply Zmult_le_0_compat || apply Zplus_le_0_compat) ; auto with * .

Ltac upper_bound_tac :=
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r || 
          apply Zmult_lt_compat || 
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with *;
  split;
  [
    repeat (apply Zplus_le_0_compat || apply Zmult_le_0_compat ||
            apply Zplus_lt_compat || apply Zmult_lt_compat_l); auto with *
  |
    idtac
  ]; 
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r || 
          apply Zmult_lt_compat || 
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with * .

Fact input_bound : forall x0 x1 x2 x3 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) ->
    x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3 < 2 ^ 256 .
intros x0 x1 x2 x3 H . 
decompose [ and ] H; clear H; intros .
rewrite radix .
apply Zle_lt_trans with (2 ^ 256 - 1); auto with * .
cut ((x0 <= 2 ^ 64 - 1) /\ (x1 <= 2 ^ 64 - 1) /\ (x2 <= 2 ^ 64 - 1) /\
     (x3 <= 2 ^ 64 - 1)) .
intros H; decompose [ and ] H; clear H; intros .
apply Zle_trans with ((2 ^ 64 - 1) + (2 ^ 64 - 1) * 2 ^ 64 +
  (2 ^ 64 - 1) * (2 ^ 64) ^ 2 + (2 ^ 64 - 1) * (2 ^ 64) ^ 3);
[ idtac | auto with * ] .
repeat (apply Zplus_le_compat || apply Zmult_le_compat_r); auto with * . 
apply Zlt_le_weak; apply Zgt_lt; apply (two_p_gt_ZERO 64); auto with * .
apply Zlt_le_weak; apply Zgt_lt; apply (two_p_gt_ZERO 64); auto with * .
repeat split; apply Zlt_succ_le; auto with * .
Qed .

Fact a0eq : forall x0 x1 x2 x3 x y0 a0 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ 
    (a0 = (x * y0) mod 2 ^ 512) ->
    a0 = x * y0 .
intros x0 x1 x2 x3 x y0 a0 H .
decompose [ and ] H; intros; clear H .
rewrite H11; clear H11; apply Zmod_small .
rewrite H0; clear H0 .
rewrite radix .
split;
[
  lower_bound_tac
|
  apply Zlt_trans with ((2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 
                        2 ^ 64 * (2 ^ 64) ^ 3) * 2 ^ 64);
  [
    upper_bound_tac
  |
    compute; auto with *
  ]
] .
Qed .

Fact a1eq : forall x0 x1 x2 x3 x y0 y1 a0 a1 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\ 
    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) ->
    a1 = x * y1 * D + a0 .
intros x0 x1 x2 x3 x y0 y1 a0 a1 H .
decompose [ and ] H; intros; clear H .
rewrite H15; clear H15; apply Zmod_small .
rewrite (a0eq x0 x1 x2 x3 x y0 a0);
[
  idtac
|
  repeat split; trivial
] .
rewrite H0; clear H0 .
rewrite radix .
split;
[
  lower_bound_tac
|
  idtac
] .
apply Zlt_trans with
  ((2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 2 ^ 64 * (2 ^ 64) ^ 3) *
   2 ^ 64 * 2 ^ 64 +
   (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 2 ^ 64 * (2 ^ 64) ^ 3) *
   2 ^ 64);
[
  idtac
|
  compute; auto with *
] .
apply Zplus_lt_compat; upper_bound_tac .
Qed .

Fact a2eq : forall x0 x1 x2 x3 x y0 y1 y2 a0 a1 a2 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\ 
    (0 <= y2 < 2 ^ 64) /\
    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) /\
    (a2 = (x * y2 * D ^ 2 + a1) mod 2 ^ 512) ->
    a2 = x * y2 * D ^ 2 + a1 .
intros x0 x1 x2 x3 x y0 y1 y2 a0 a1 a2 H .
decompose [ and ] H; intros; clear H .
rewrite H18; clear H18; apply Zmod_small .
rewrite (a1eq x0 x1 x2 x3 x y0 y1 a0 a1);
[
  rewrite (a0eq x0 x1 x2 x3 x y0 a0);
  [
    idtac
  |
    repeat split; trivial
  ]
|
  repeat split; trivial
] .
rewrite H0; clear H0 .
rewrite radix .
split;
[
  lower_bound_tac
|
  idtac
] .
apply Zlt_trans with
  ((2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 2 ^ 64 * (2 ^ 64) ^ 3) 
      * 2 ^ 64 * (2 ^ 64) ^ 2 +
   ((2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 
     2 ^ 64 * (2 ^ 64) ^ 3) * 2 ^ 64 * 2 ^ 64 +
    (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 + 
     2 ^ 64 * (2 ^ 64) ^ 3) * 2 ^ 64));
[
  idtac
|
  compute; auto with *
] .
apply Zplus_lt_compat; 
[
  upper_bound_tac
|
  apply Zplus_lt_compat; upper_bound_tac
] .
Qed .

Fact a3_upper_bound : forall x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\ 
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) /\
    (a2 = (x * y2 * D ^ 2 + a1) mod 2 ^ 512) ->
    x * y3 * D ^ 3 + a2 < 2 ^ 512 .
intros x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 H .
decompose [ and ] H; intros; clear H .
rewrite (a2eq x0 x1 x2 x3 x y0 y1 y2 a0 a1 a2);
[
  rewrite (a1eq x0 x1 x2 x3 x y0 y1 a0 a1);
  [
    rewrite (a0eq x0 x1 x2 x3 x y0 a0);
    [
      idtac
    |
      repeat split; trivial
    ]
  |
    repeat split; trivial
  ]
|
  repeat split; trivial
] .
rewrite H0; clear H0 .
cut_eqn ((x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) * y3 * D ^ 3 +
         ((x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) * y2 * D ^ 2 +
         ((x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) * y1 * D +
         (x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) * y0)) =
         (x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) *
         (y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3));
[
  idtac
|
  ring 
] .
cut_eqn (2 ^ 512 = 2 ^ 256 * 2 ^ 256) .
apply Zmult_lt_compat;
[
  split;
  [ 
    rewrite radix; lower_bound_tac 
  | 
    apply (input_bound x0 x1 x2 x3); repeat split; trivial
  ]
|
  split;
  [ 
    rewrite radix; lower_bound_tac 
  | 
    apply (input_bound y0 y1 y2 y3); repeat split; trivial
  ]
] .
auto with * .
Qed .

Fact a3_lower_bound : forall x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\ 
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) /\
    (a2 = (x * y2 * D ^ 2 + a1) mod 2 ^ 512) ->
    0 <= x * y3 * D ^ 3 + a2 .
intros x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 H .
decompose [ and ] H; intros; clear H .
rewrite (a2eq x0 x1 x2 x3 x y0 y1 y2 a0 a1 a2);
[
  rewrite (a1eq x0 x1 x2 x3 x y0 y1 a0 a1);
  [
    rewrite (a0eq x0 x1 x2 x3 x y0 a0);
    [
      idtac
    |
      repeat split; trivial
    ]
  |
    repeat split; trivial
  ]
|
  repeat split; trivial
] .
rewrite H0; clear H0 .
rewrite radix .
lower_bound_tac .
Qed .

Fact a3eq : forall x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 a3 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\ 
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) /\
    (a2 = (x * y2 * D ^ 2 + a1) mod 2 ^ 512) /\
    (a3 = (x * y3 * D ^ 3 + a2) mod 2 ^ 512) ->
    a3 = x * y3 * D ^ 3 + a2 .
intros x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 a3 H .
decompose [ and ] H; intros; clear H .
rewrite H21; clear H21; apply Zmod_small .
split;
[
  apply (a3_lower_bound x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2)
|
  apply (a3_upper_bound x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2)
];
repeat split; trivial .
Qed .

Ltac d438tac eqn :=
  cut eqn;
  [
    intros HH; rewrite HH; clear HH;
    repeat (apply modulo_mult_substr);
    apply D438
  |
    ring
  ] .

Fact zi_reduce :
  forall x0 x1 x2 x3 y0 y1 y2 y3 x y 
         s0 s1 s2 s3 s4 s5 s6 s7  : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (y = y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) /\

    (0 <= s0 < 2 ^ 64) /\ (0 <= s1 < 2 ^ 64) /\ (0 <= s2 < 2 ^ 64) /\ 
    (0 <= s3 < 2 ^ 64) /\ (0 <= s4 < 2 ^ 64) /\ (0 <= s5 < 2 ^ 64) /\
    (0 <= s6 < 2 ^ 64) /\ (0 <= s7 < 2 ^ 64) ->
    (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 + s4 * D ^ 4 + s5 * D ^ 5 +
     s6 * D ^ 6 + s7 * D ^ 7 ==
     s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 +
     38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3) [2 ^ 255 - 19]) .
intros x0 x1 x2 x3 y0 y1 y2 y3 x y s0 s1 s2 s3 s4 s5 s6 s7 H .
decompose [ and ] H; clear H; intros .
repeat (rewrite Zplus_assoc_reverse) .
repeat (apply modulo_plus_substl) .
repeat (rewrite Zmult_plus_distr_r) .

cut (s4 * D ^ 4 == 38 * s4 [ 2 ^ 255 - 19 ]);
[
  intros HH; apply modulo_plus_subst; trivial; clear HH
|
  d438tac (s4 * D ^ 4 = D ^ 4 * s4)
] .

cut (s5 * D ^ 5 == 38 * (s5 * D) [ 2 ^ 255 - 19 ]);
[
  intros HH; apply modulo_plus_subst; trivial; clear HH
|
  d438tac (s5 * D ^ 5 = D ^ 4 * (s5 * D))
] .

cut (s6 * D ^ 6 == 38 * (s6 * D ^ 2) [ 2 ^ 255 - 19 ]);
[
  intros HH; apply modulo_plus_subst; trivial; clear HH
|
  d438tac (s6 * D ^ 6 = D ^ 4 * (s6 * D ^ 2))
] .

d438tac (s7 * D ^ 7 = D ^ 4 * (s7 * D ^ 3)) .

Qed .

Fact ri''eq :
  forall x0 x1 x2 x3 y0 y1 y2 y3 x y 
         s0 s1 s2 s3 s4 s5 s6 s7 s0' s1' s2' s3' s4' : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (y = y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) /\

    (0 <= s0 < 2 ^ 64) /\ (0 <= s1 < 2 ^ 64) /\ (0 <= s2 < 2 ^ 64) /\ 
    (0 <= s3 < 2 ^ 64) /\ (0 <= s4 < 2 ^ 64) /\ (0 <= s5 < 2 ^ 64) /\
    (0 <= s6 < 2 ^ 64) /\ (0 <= s7 < 2 ^ 64) /\

    (0 <= s0' < 2 ^ 64) /\ (0 <= s1' < 2 ^ 64) /\ (0 <= s2' < 2 ^ 64) /\
    (0 <= s3' < 2 ^ 64) /\ (0 <= s4' < 2 ^ 64) /\ 
    (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4 =
     (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 +
      38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3)) mod 2 ^ 321) ->
    s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4 =
    s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 +
    38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3) .
intros x0 x1 x2 x3 y0 y1 y2 y3 x y s0 s1 s2 s3 s4 s5 s6 s7 
       r''0 r''1 r''2 r''3 r''4 H .
decompose [ and ] H; clear H; intros .
rewrite H44; clear H44; apply Zmod_small .
rewrite radix .
split .
  lower_bound_tac .
  apply Zlt_trans with (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 +
    2 ^ 64 * (2 ^ 64) ^ 3 + 
    38 * (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 +
          2 ^ 64 * (2 ^ 64) ^ 3));
  [ idtac | compute; auto with * ] .
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r ||
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with * .
  apply Zmult_lt_compat_l; auto with * .
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r ||
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with * .
Qed .

Fact ri'eq :
  forall x0 x1 x2 x3 y0 y1 y2 y3 x y 
         s0' s1' s2' s3' s4' r0' s1'' s2'' s3'' s4'' : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (y = y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) /\

    (0 <= s0' < 2 ^ 64) /\ (0 <= s1' < 2 ^ 64) /\ (0 <= s2' < 2 ^ 64) /\
    (0 <= s3' < 2 ^ 64) /\ (0 <= s4' < 2 ^ 64) /\ 

    (0 <= r0' < 2 ^ 64) /\ (0 <= s1'' < 2 ^ 64) /\ (0 <= s2'' < 2 ^ 64) /\
    (0 <= s3'' < 2 ^ 64) /\ (0 <= s4'' < 2) /\ 
    (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4 =
     (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4') mod 2 ^ 321) ->
    r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4 =
     s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4' .
intros x0 x1 x2 x3 y0 y1 y2 y3 x y 
       s0' s1' s2' s3' s4' r0' s1'' s2'' s3'' s4'' H .
decompose [ and ] H; clear H; intros .
rewrite H38; clear H38; apply Zmod_small .
rewrite radix .
split;
[
  lower_bound_tac
|
  apply Zlt_trans with (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 +
                        2 ^ 64 * (2 ^ 64) ^ 3 + 38 * 2 ^ 64);
  [
    idtac
  |
    compute; auto with *
  ];
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r ||
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with *
] .
Qed .

Fact rieq : forall x0 x1 x2 x3 y0 y1 y2 y3 x y 
                   r0' s1'' s2'' s3'' s4'' r0 r1 r2 r3 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (y = y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) /\

    (0 <= r0' < 2 ^ 64) /\ (0 <= s1'' < 2 ^ 64) /\ (0 <= s2'' < 2 ^ 64) /\
    (0 <= s3'' < 2 ^ 64) /\ (0 <= s4'' < 2) /\ 
    
    (0 <= r0 < 2 ^ 64) /\ (0 <= r1 < 2 ^ 64) /\ (0 <= r2 < 2 ^ 64) /\
    (0 <= r3 < 2 ^ 64) /\

    (r0 + r1 * D + r2 * D ^ 2 + r3 * D ^ 3 =
     (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'') mod 2 ^ 257) ->
    r0 + r1 * D + r2 * D ^ 2 + r3 * D ^ 3 =
    r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'' .
intros x0 x1 x2 x3 y0 y1 y2 y3 x y r0' s1'' s2'' s3'' s4'' r0 r1 r2 r3 H .
decompose [ and ] H; clear H; intros .
rewrite H36; clear H36; apply Zmod_small .
rewrite radix .
split .
  lower_bound_tac .
  apply Zlt_trans with (2 ^ 64 + 2 ^ 64 * 2 ^ 64 + 2 ^ 64 * (2 ^ 64) ^ 2 +
    2 ^ 64 * (2 ^ 64) ^ 3 + 38 * 2); [ idtac | compute; auto with * ] .
  repeat (apply Zplus_lt_compat || apply Zmult_lt_compat_r ||
          (apply Zgt_lt; apply (two_p_gt_ZERO 64))); auto with * .
Qed .  

Fact ri''modulo : forall s0' s1' s2' s3' s4' : Z,
    (0 <= s0' < 2 ^ 64) /\ (0 <= s1' < 2 ^ 64) /\ (0 <= s2' < 2 ^ 64) /\
    (0 <= s3' < 2 ^ 64) /\ (0 <= s4' < 2 ^ 64) ->
   (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4 ==
   s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4' [
   2 ^ 255 - 19]) .
intros s0' s1' s2' s3' s4' H .
decompose [ and ] H; clear H; intros .
repeat (rewrite Zplus_assoc_reverse) .
repeat (apply modulo_plus_substl) .
d438tac (s4' * D ^ 4 = D ^ 4 * s4') .
Qed .

Fact ri'modulo : forall r0' s1'' s2'' s3'' s4'' : Z,
    (0 <= r0' < 2 ^ 64) /\ (0 <= s1'' < 2 ^ 64) /\ (0 <= s2'' < 2 ^ 64) /\
    (0 <= s3'' < 2 ^ 64) /\ (0 <= s4'' < 2) ->
   (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4 ==
    r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'' [
   2 ^ 255 - 19]) .
intros r0' s1'' s2'' s3'' s4'' H .
decompose [ and ] H; clear H; intros .
repeat (rewrite Zplus_assoc_reverse) .
repeat (apply modulo_plus_substl) .
d438tac (s4'' * D ^ 4 = D ^ 4 * s4'') .
Qed .

Lemma reasoning :
  forall x0 x1 x2 x3 y0 y1 y2 y3 x y 
         s0 s1 s2 s3 s4 s5 s6 s7 a0 a1 a2 a3
         s0' s1' s2' s3' s4' r0' s1'' s2'' s3'' s4'' r0 r1 r2 r3 : Z,
    (0 <= x0 < 2 ^ 64) /\ (0 <= x1 < 2 ^ 64) /\
    (0 <= x2 < 2 ^ 64) /\ (0 <= x3 < 2 ^ 64) /\
    (x = x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) /\
    (0 <= y0 < 2 ^ 64) /\ (0 <= y1 < 2 ^ 64) /\
    (0 <= y2 < 2 ^ 64) /\ (0 <= y3 < 2 ^ 64) /\
    (y = y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) /\

    (a0 = (x * y0) mod 2 ^ 512) /\ 
    (a1 = (x * y1 * D + a0) mod 2 ^ 512) /\
    (a2 = (x * y2 * D ^ 2 + a1) mod 2 ^ 512) /\
    (a3 = (x * y3 * D ^ 3 + a2) mod 2 ^ 512) /\
    (0 <= s0 < 2 ^ 64) /\ (0 <= s1 < 2 ^ 64) /\ (0 <= s2 < 2 ^ 64) /\ 
    (0 <= s3 < 2 ^ 64) /\ (0 <= s4 < 2 ^ 64) /\ (0 <= s5 < 2 ^ 64) /\
    (0 <= s6 < 2 ^ 64) /\ (0 <= s7 < 2 ^ 64) /\
    (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 + s4 * D ^ 4 + s5 * D ^ 5 +
        s6 * D ^ 6 + s7 * D ^ 7 = a3 mod 2 ^ 512) /\

    (0 <= s0' < 2 ^ 64) /\ (0 <= s1' < 2 ^ 64) /\ (0 <= s2' < 2 ^ 64) /\
    (0 <= s3' < 2 ^ 64) /\ (0 <= s4' < 2 ^ 64) /\ 
    (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4 =
     (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 +
      38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3)) mod 2 ^ 321) /\

    (0 <= r0' < 2 ^ 64) /\ (0 <= s1'' < 2 ^ 64) /\ (0 <= s2'' < 2 ^ 64) /\
    (0 <= s3'' < 2 ^ 64) /\ (0 <= s4'' < 2) /\ 
    (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4 =
     (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4') mod 2 ^ 321) /\
    
    (0 <= r0 < 2 ^ 64) /\ (0 <= r1 < 2 ^ 64) /\ (0 <= r2 < 2 ^ 64) /\
    (0 <= r3 < 2 ^ 64) /\
    (r0 + r1 * D + r2 * D ^ 2 + r3 * D ^ 3 =
     (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'') mod 2 ^ 257) 
    ->
    ((x0 + x1 * D + x2 * D^2 + x3 * D^3) *
     (y0 + y1 * D + y2 * D^2 + y3 * D^3) ==
     (r0 + r1 * D + r2 * D^2 + r3 * D^3) [ 2^255 - 19 ]) .
Proof .
intros .
decompose [ and ] H; clear H .

cut_eqn (r0 + r1 * D + r2 * D ^ 2 + r3 * D ^ 3 =
         r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'') .

cut_modulo (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4) 
           (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + 38 * s4'') .

cut_eqn (r0' + s1'' * D + s2'' * D ^ 2 + s3'' * D ^ 3 + s4'' * D ^ 4 =
         s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4') .

cut_modulo (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4)
           (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + 38 * s4') .
       
cut_eqn (s0' + s1' * D + s2' * D ^ 2 + s3' * D ^ 3 + s4' * D ^ 4 =
         s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 +
         38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3)) .

cut_modulo (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 + 
            s4 * D ^ 4 + s5 * D ^ 5 + s6 * D ^ 6 + s7 * D ^ 7)
           (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 + 
            38 * (s4 + s5 * D + s6 * D ^ 2 + s7 * D ^ 3)) .

cut_eqn (s0 + s1 * D + s2 * D ^ 2 + s3 * D ^ 3 + s4 * D ^ 4 + s5 * D ^ 5 +
         s6 * D ^ 6 + s7 * D ^ 7 = a3) .

cut_eqn (a3 = x * y3 * D ^ 3 + a2);
[
  idtac
|
  apply (a3eq x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 a3);
  repeat split; trivial
] .

cut_eqn (a2 = x * y2 * D ^ 2 + a1);
[
  idtac
|
  apply (a2eq x0 x1 x2 x3 x y0 y1 y2 a0 a1 a2);
  repeat split; trivial
] .

cut_eqn (a1 = x * y1 * D + a0);
[
  idtac
|
  apply (a1eq x0 x1 x2 x3 x y0 y1 a0 a1);
  repeat split; trivial
] .

cut_eqn (a0 = x * y0);
[
  idtac
|
  apply (a0eq x0 x1 x2 x3 x y0 a0);
  repeat split; trivial
] .

cut ((x0 + x1 * D + x2 * D ^ 2 + x3 * D ^ 3) *
     (y0 + y1 * D + y2 * D ^ 2 + y3 * D ^ 3) =
     x * y3 * D ^ 3 + (x * y2 * D ^ 2 + (x * y1 * D + x * y0))); 
[ intros HH; rewrite HH; apply modulo_refl | rewrite H0; ring ] .

rewrite H22; apply Zmod_small .
rewrite (a3eq x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2 a3);
repeat split; trivial .
apply (a3_lower_bound x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2);
repeat split; trivial .
apply (a3_upper_bound x0 x1 x2 x3 x y0 y1 y2 y3 a0 a1 a2);
repeat split; trivial .

apply (zi_reduce x0 x1 x2 x3 y0 y1 y2 y3 x y s0 s1 s2 s3 s4 s5 s6 s7);
repeat split; trivial .

apply (ri''eq x0 x1 x2 x3 y0 y1 y2 y3 x y s0 s1 s2 s3 s4 s5 s6 s7
       s0' s1' s2' s3' s4'); repeat split; trivial .

apply (ri''modulo s0' s1' s2' s3' s4'); repeat split; trivial .

apply (ri'eq x0 x1 x2 x3 y0 y1 y2 y3 x y
       s0' s1' s2' s3' s4' r0' s1'' s2'' s3'' s4''); repeat split; trivial .

apply (ri'modulo r0' s1'' s2'' s3'' s4''); repeat split; trivial .

apply (rieq x0 x1 x2 x3 y0 y1 y2 y3 x y r0' s1'' s2'' s3'' s4'' r0 r1 r2 r3);
repeat split; trivial .

Qed .

Quit .
