# File:   avrnacl-20130514/smallrom/shared/bigint_mul136.S
# Author: Michael Hutter, Peter Schwabe
# Public Domain

.global bigint_mul136
.type bigint_mul136, @function

bigint_mul136:
    
  PUSH R2 
  PUSH R3 
  PUSH R4 
  PUSH R5 
  PUSH R6 
  PUSH R7 
  PUSH R8 
  PUSH R9 
  PUSH R10
  PUSH R11
  PUSH R12
  PUSH R13
  PUSH R14
  PUSH R15
  PUSH R16
  PUSH R17
  PUSH R28
  PUSH R29
  
  ;Z<-r
  MOVW R30, R24
  ;X<-a
  MOVW R26, R22
  ;Y<-b
  MOVW R28, R20

  CLR R25 ; zero register

  ; load 9 lowest limbs of a
  LD R2, X+
  LD R3, X+
  LD R4, X+
  LD R5, X+
  LD R6, X+
  LD R7, X+
  LD R8, X+
  LD R9, X+
  LD R10, X+
  
  ; load 9 lowest limbs of b
  LD R11, Y+
  LD R12, Y+
  LD R13, Y+
  LD R14, Y+
  LD R15, Y+
  LD R16, Y+
  LD R17, Y+
  LD R18, Y+
  LD R19, Y+

  # FIRST BLOCK
  
  ; a0*b0
  MUL R2,R11
  ST Z+, R0
  MOV R20,R1

  ; a0*b1 + a1*b0
  MUL R2,R12
  CLR R21
  CLR R22
  ADD R20,R0
  ADC R21,R1
  
  MUL R3,R11
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a0*b2 + a1*b1 + b2*b0
  MUL R2,R13
  CLR R20
  ADD R21,R0
  ADC R22,R1
 
  MUL R3,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R4,R11
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a0*b3 + a1*b2 + a2*b1 + a3+b0
  MUL R2,R14
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R4,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R5,R11
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
  
  ; a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
  MUL R2,R15
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R5,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R6,R11
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
  MUL R2,R16
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R4,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R5,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R6,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R7,R11
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6+b0
  MUL R2,R17
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R4,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R5,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R6,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R7,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R8,R11
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
 

  ; a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6+b1 + a7*b0
  MUL R2,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R5,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R6,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R7,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R8,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R9,R11
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
 

  ; a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6+b2 + a7*b1 + a8*b0
  MUL R2,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R3,R18
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R4,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R5,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R10,R11
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7+b2 + a8*b1
  MUL R3,R19
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R4,R18
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R5,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R6,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R10,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8+b2
  MUL R4,R19
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R5,R18
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R6,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R7,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R10,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3
  MUL R5,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R18
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R10,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4
  MUL R6,R19
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R18
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R10,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a5*b8 + a6*b7 + a7*b6 + a8*b5
  MUL R7,R19
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R18
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R10,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20


  ; a6*b8 + a7*b7 + a8*b6
  MUL R8,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R18
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R10,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a7*b8 + a8*b7
  MUL R9,R19
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R10,R18
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a8*b8
  MUL R10,R19
  ADD R20,R0
  ADC R21,R1
  ST Z+, R20
  ST Z+, R21


  # SECOND BLOCK
  SBIW R30, 9

  ; load 8 highest limbs of b
  LD R11, Y+
  LD R12, Y+
  LD R13, Y+
  LD R14, Y+
  LD R15, Y+
  LD R16, Y+
  LD R17, Y+
  LD R18, Y+

  
  ; a0*b9
  LD R24, Z
  MUL R2,R11
  CLR R20
  ADD R24, R0
  ADC R20, R1
  ST Z+, R24

  ; a0*b10 + a1*b9
  MUL R2,R12
  CLR R21
  CLR R22
  ADD R20,R0
  ADC R21,R1
  
  LD R24, Z
  MUL R3,R11
  ADD R20, R24
  ADC R1, R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a0*b11 + a1*b10 + b2*b9
  MUL R2,R13
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R4,R11
  ADD R21,R24
  ADC R1, R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a0*b12 + a1*b11 + a2*b10 + a3+b9
  MUL R2,R14
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R4,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  LD R24, Z
  MUL R5,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
  
  ; a0*b13 + a1*b12 + a2*b11 + a3*b10 + a4*b9
  MUL R2,R15
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R5,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  LD R24, Z
  MUL R6,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a0*b14 + a1*b13 + a2*b12 + a3*b11 + a4*b10 + a5*b9
  MUL R2,R16
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R4,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R5,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R6,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R7,R11
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a0*b15 + a1*b14 + a2*b13 + a3*b12 + a4*b11 + a5*b10+ a6+b9
  MUL R2,R17
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R4,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R5,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R6,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R7,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  LD R24, Z
  MUL R8,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
 

  ; a0*b16 + a1*b15 + a2*b14 + a3*b13 + a4*b12 + a5*b11 + a6+b10 + a7*b9
  MUL R2,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R5,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R6,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R7,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R8,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  LD R24, Z
  MUL R9,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
 

  ; a1*b16 + a2*b15 + a3*b14 + a4*b13 + a5*b12 + a6+b11 + a7*b10 + a8*b9
  MUL R3,R18
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R4,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R5,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  LD R24, Z
  MUL R10,R11
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a2*b16 + a3*b15 + a4*b14 + a5*b13 + a6*b12 + a7+b11 + a8*b10
  MUL R4,R18
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R5,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R6,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R10,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a3*b16 + a4*b15 + a5*b14 + a6*b13 + a7*b12 + a8+b11
  MUL R5,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R6,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R7,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R10,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a4*bi16 + a5*b15 + a6*b14 + a7*b13 + a8*b12
  MUL R6,R18
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R10,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a5*b16 + a6*b15 + a7*b14 + a8*b13
  MUL R7,R18
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R10,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a6*b16 + a7*b15 + a8*b14
  MUL R8,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R10,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20


  ; a7*b16 + a8*b15
  MUL R9,R18
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R10,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a8*b16
  MUL R10,R18
  ADD R22,R0
  ADC R20,R1
  ST Z+, R22
  ST Z+, R20


  # THIRD BLOCK

  SBIW R30, 17
  SBIW R28, 17

  ; load 8 high limbs of a
  LD R2, X+
  LD R3, X+
  LD R4, X+
  LD R5, X+
  LD R6, X+
  LD R7, X+
  LD R8, X+
  LD R9, X+

  ; load 9 low limbs of b
  LD R11, Y+
  LD R12, Y+
  LD R13, Y+
  LD R14, Y+
  LD R15, Y+
  LD R16, Y+
  LD R17, Y+
  LD R18, Y+
  LD R19, Y+


  ; a9*b0
  LD R24, Z
  MUL R2,R11
  CLR R20
  ADD R24,R0
  ADC R20,R1
  ST Z+, R24

  ; a9*b1 + a10*b0
  MUL R2,R12
  CLR R21
  CLR R22
  ADD R20,R0
  ADC R21,R1
  
  LD R24, Z
  MUL R3,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a9*b2 + a10*b1 + b11*b0
  MUL R2,R13
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R4,R11
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a9*b3 + a10*b2 + a11*b1 + a12+b0
  MUL R2,R14
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R4,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  LD R24, Z
  MUL R5,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
  
  ; a9*b4 + a10*b3 + a11*b2 + a12*b1 + a13*b0
  MUL R2,R15
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R5,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  LD R24, Z
  MUL R6,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a9*b5 + a10*b4 + a11*b3 + a12*b2 + a13*b1 + a14*b0
  MUL R2,R16
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R4,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R5,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R6,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R7,R11
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a9*b6 + a10*b5 + a11*b4 + a12*b3 + a13*b2 + a14*b1 + a15+b0
  MUL R2,R17
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R4,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R5,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R6,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R7,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  LD R24, Z
  MUL R8,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
 

  ; a9*b7 + a10*b6 + a11*b5 + a12*b4 + a13*b3 + a14*b2 + a15+b1 + a16*b0
  MUL R2,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R5,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R6,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R7,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R8,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  LD R24, Z
  MUL R9,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a9*b8 + a10*b7 + a11*b6 + a12*b5 + a13*b4 + a14*b3 + a15+b2 + a16*b1
  MUL R2,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R3,R18
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R4,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R5,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  LD R24, Z
  MUL R9,R12
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a10*b8 + a11*b7 + a12*b6 + a13*b5 + a14*b4 + a15*b3 + a16+b2
  MUL R3,R19
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R4,R18
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R5,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R6,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  LD R24, Z
  MUL R9,R13
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a11*b8 + a12*b7 + a13*b6 + a14*b5 + a15*b4 + a16*b3
  MUL R4,R19
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R5,R18
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R6,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R7,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  LD R24, Z
  MUL R9,R14
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a12*b8 + a13*b7 + a14*b6 + a15*b5 + a16*b4
  MUL R5,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R18
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  LD R24, Z
  MUL R9,R15
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a13*b8 + a14*b7 + a15*b6 + a16*b5
  MUL R6,R19
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R18
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  LD R24, Z
  MUL R9,R16
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a14*b8 + a15*b7 + a16*b6
  MUL R7,R19
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R18
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  LD R24, Z
  MUL R9,R17
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a15*b8 + a16*b7
  MUL R8,R19
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  LD R24, Z
  MUL R9,R18
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a16*b8
  LD R24, Z
  MUL R9,R19
  CLR R21
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ST Z+, R22
  
  LD R24, Z
  ADC R1,R24
  ADC R21,R25
  ADD R20,R1
  ADC R21,R25
  ST Z+, R20
  ST Z, R21

  # FOURTH BLOCK
  SBIW R30, 8

  ; load 8 highest limbs of b
  LD R11, Y+
  LD R12, Y+
  LD R13, Y+
  LD R14, Y+
  LD R15, Y+
  LD R16, Y+
  LD R17, Y+
  LD R18, Y+

  
  ; a9*b9
  LD R24, Z
  MUL R2,R11
  CLR R20
  ADD R24, R0
  ADC R20, R1
  ST Z+, R24

  ; a9*b10 + a10*b9
  MUL R2,R12
  CLR R21
  CLR R22
  ADD R20,R0
  ADC R21,R1
  
  LD R24, Z
  MUL R3,R11
  ADD R20, R24
  ADC R1, R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a9*b11 + a10*b10 + a11*b9
  MUL R2,R13
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R4,R11
  ADD R21,R24
  ADC R1, R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a9*b12 + a10*b11 + a11*b10 + a12+b9
  MUL R2,R14
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R4,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  LD R24, Z
  MUL R5,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
  
  ; a9*b13 + a10*b12 + a11*b11 + a12*b10 + a13*b9
  MUL R2,R15
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R5,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  LD R24, Z
  MUL R6,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
  
  ; a9*b14 + a10*b13 + a11*b12 + a12*b11 + a13*b10 + a14*b9
  MUL R2,R16
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R3,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R4,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R5,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  MUL R6,R12
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
 
  LD R24, Z
  MUL R7,R11
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21
  
  ; a9*b15 + a10*b14 + a11*b13 + a12*b12 + a13*b11 + a14*b10+ a15+b9
  MUL R2,R17
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R3,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
 
  MUL R4,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R5,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R6,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  MUL R7,R12
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  
  LD R24, Z
  MUL R8,R11
  ADD R22,R24
  ADC R1,R25
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22
 

  ; a9*b16 + a10*b15 + a11*b14 + a12*b13 + a13*b12 + a14*b11 + a15+b10 + a16*b9
  MUL R2,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R3,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
 
  MUL R4,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R5,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R6,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R7,R13
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  MUL R8,R12
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  
  LD R24, Z
  MUL R9,R11
  ADD R20,R24
  ADC R1,R25
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20
 

  ; a10*b16 + a11*b15 + a12*b14 + a13*b13 + a14*b12 + a15+b11 + a16*b10
  MUL R3,R18
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R4,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R5,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R6,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R14
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R13
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  LD R24, Z
  MUL R9,R12
  ADD R21,R24
  ADC R1,R25
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a11*b16 + a12*b15 + a13*b14 + a14*b13 + a15*b12 + a16+b11
  MUL R4,R18
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R5,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R6,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R7,R15
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R14
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R13
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a12*b16 + a13*b15 + a14*b14 + a15*b13 + a16*b12
  MUL R5,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R6,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R7,R16
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R8,R15
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R14
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a13*bi16 + a14*b15 + a15*b14 + a16*b13
  MUL R6,R18
  CLR R20
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R7,R17
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R8,R16
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25

  MUL R9,R15
  ADD R21,R0
  ADC R22,R1
  ADC R20,R25
  ST Z+, R21

  ; a14*b16 + a15*b15 + a16*b14
  MUL R7,R18
  CLR R21
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R8,R17
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25

  MUL R9,R16
  ADD R22,R0
  ADC R20,R1
  ADC R21,R25
  ST Z+, R22

  ; a15*b16 + a16*b15
  MUL R8,R18
  CLR R22
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25

  MUL R9,R17
  ADD R20,R0
  ADC R21,R1
  ADC R22,R25
  ST Z+, R20

  ; a16*b16
  MUL R9,R18
  ADD R21,R0
  ADC R22,R1
  ST Z+, R21
  ST Z+, R22


  CLR R1
  POP R29
  POP R28
  POP R17
  POP R16
  POP R15
  POP R14
  POP R13
  POP R12
  POP R11
  POP R10
  POP R9 
  POP R8 
  POP R7 
  POP R6 
  POP R5 
  POP R4 
  POP R3 
  POP R2 
  
  RET
