
library ieee;
use ieee.numeric_std.all;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;

use work.nacl_constants.all;

entity nacl_alu_core is
  port(
    inA              :  in std_logic_vector(WORD_SIZE-1 downto 0);
    inB              :  in std_logic_vector(WORD_SIZE-1 downto 0);
    inC              :  in std_logic_vector(ACCU_SIZE+ACCU_EXTENSION-1 downto 0);
    cin              :  in std_logic;
    en_adder_ext     :  in std_logic_vector(NUM_ADDERS downto 0);
    en_logic_xor     :  in std_logic;
    en_logic_or      :  in std_logic;
    en_logic_adder   :  in std_logic;
    sel_en_mode      :  in std_logic;
    sel_add_mode     :  in std_logic;
    mult_counter     :  in std_logic_vector(MULT_CYCLES_LD-1 downto 0);
    result           : out std_logic_vector(ACCU_SIZE+ACCU_EXTENSION-1 downto 0)
  );
end nacl_alu_core;

architecture behaviour of nacl_alu_core is

  signal inA_padded   : std_logic_vector((NUM_ADDERS+1)*MULT_CYCLES-1 downto 0);
  signal en_adder_int : std_logic_vector(NUM_ADDERS downto 0);
  signal en_adder     : std_logic_vector(NUM_ADDERS downto 0);
  signal sel_en_mode0 : std_logic_vector(NUM_ADDERS downto 0);
  signal sel_en_mode1 : std_logic_vector(NUM_ADDERS downto 0);

  signal muxAdder1_out : std_logic_vector(WORD_SIZE-1 downto 0);

  signal mult_result       : std_logic_vector(WORD_SIZE+NUM_ADDERS-1 downto 0);
  signal mult_cout      : std_logic;

  signal adder0_a     : std_logic_vector(WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-2 downto 0);
  signal adder0_b     : std_logic_vector(WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-2 downto 0);
  signal adder0_sum   : std_logic_vector(WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-2 downto 0);
  signal adder0_cin   : std_logic;
  signal adder0_cout  : std_logic;

  signal adder1_a     : std_logic_vector(WORD_SIZE-1 downto 0);
  signal adder1_b     : std_logic_vector(WORD_SIZE-1 downto 0);
  signal adder1_sum   : std_logic_vector(WORD_SIZE-1 downto 0);
  signal adder1_cin   : std_logic;
  signal adder1_cout  : std_logic;

  signal adder2_a     : std_logic_vector(NUM_ADDERS+3 downto 0);
  signal adder2_b     : std_logic_vector(NUM_ADDERS+3 downto 0);
  signal adder2_b2    : std_logic_vector(NUM_ADDERS+3 downto 0);
  signal adder2_b_dec : std_logic_vector(NUM_ADDERS+3 downto 0);
  signal adder2_sum   : std_logic_vector(NUM_ADDERS+3 downto 0);
  signal adder2_cin   : std_logic;
  signal adder2_cout  : std_logic;

begin

  multiplier : entity work.nacl_alu_multiplier
    port map(
      inp      => inB,
      en_adder => en_adder,
      result   => mult_result,
      cout     => mult_cout
    );

  adder0  : entity work.nacl_adder
    generic map(
      WIDTH => WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-1
    )
    port map(
      a    => adder0_a,
      b    => adder0_b,
      sum  => adder0_sum,
      cin  => adder0_cin,
      cout => adder0_cout
    );

  muxAdder1 : entity work.nacl_mux2
    generic map(
      WIDTH => WORD_SIZE
    )
    port map(
      in0  =>inC(2*WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-2 downto WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-1),
      in1  => inA,
      outp => muxAdder1_out,
      sel  => sel_add_mode
    );

  adder1 : entity work.nacl_adder_dynamic
    generic map(
      WIDTH => WORD_SIZE
    )
    port map(
      a        => adder1_b,
      b        => adder1_a,
      sum      => adder1_sum,
      cin      => adder1_cin,
      cout     => adder1_cout,
      xor_en   => en_logic_xor,
      or_en    => en_logic_or,
      adder_en => en_logic_adder
    );

  adder2  : entity work.nacl_adder
    generic map(
      WIDTH => NUM_ADDERS+4
    )
    port map(
      a    => adder2_a,
      b    => adder2_b,
      sum  => adder2_sum,
      cin  => adder2_cin,
      cout => adder2_cout
    );
    
  inAPadding : process(inA) begin
    if (NUM_ADDERS+1)*MULT_CYCLES = WORD_SIZE then
      inA_padded <= inA;
    else
      inA_padded((NUM_ADDERS+1)*(MULT_CYCLES-1)-1 downto 0) <= inA((NUM_ADDERS+1)*(MULT_CYCLES-1)-1 downto 0);
      inA_padded((NUM_ADDERS+1)*MULT_CYCLES-1 downto (NUM_ADDERS+1)*(MULT_CYCLES-1)) <= inA(WORD_SIZE-1 downto (NUM_ADDERS+1)*(MULT_CYCLES-1)) & '0';
    end if;
  end process;

  en_adder_int <= inA_padded(((NUM_ADDERS+1)*(conv_integer(mult_counter)+1))-1 downto
                              (NUM_ADDERS+1) *conv_integer(mult_counter));
  sel_en_mode0 <= (others => not sel_en_mode);
  sel_en_mode1 <= (others => sel_en_mode);
  en_adder <= (en_adder_int and sel_en_mode1) or (en_adder_ext and sel_en_mode0);

  adder0_a <= inC(WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-2 downto 0);
  adder0_b <= (others => '0');
  adder0_cin <= adder2_cout;

  adder1_a <= muxAdder1_out;
  adder1_b <= mult_result(WORD_SIZE-1 downto 0);
  adder1_cin <= cin;

  adder2_a  <= inC(2*WORD_SIZE+ACCU_EXTENSION+2 downto 2*WORD_SIZE+ACCU_EXTENSION-NUM_ADDERS-1);
  adder2_b  <= "000" & mult_cout & mult_result(WORD_SIZE+NUM_ADDERS-1 downto WORD_SIZE);
  adder2_b_dec <= (others => '0');
  adder2_cin <= adder1_cout and en_logic_adder;

  result <= adder2_sum & adder1_sum & adder0_sum;

end;
