#ifndef __POLY1305KEYPOWERSASM__
#define __POLY1305KEYPOWERSASM__

/* field arithmetic used for computing the key powers */

#define fe1305_tau_square()				\
							\
	movq    %r15,%rax;				\
	mulq	%r15;					\
	movq    %rax,%r10;				\
	xorq    %r11,%r11;				\
	movq    %rdx,%r12;				\
	xorq    %r13,%r13;				\
	movq    %rdx,%r8;				\
	xorq    %r9,%r9;				\
	shld    $62,%r8,%r9;				\
	shlq    $62,%r8;				\
							\
	movq    %r14,%rax;				\
	mulq	%r14;					\
	addq    %rax,%r8;				\
	adcq    $0,%r9;				\
	addq    %rdx,%r12;				\
	adcq    %r13,%r13;				\
							\
	movq    %r14,%rax;				\
	mulq	%r15;					\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
							\
	addq    %r12,%r9;				\
	adcq    $0,%r13;				\
							\
	addq    %r13,%r10;				\
	adcq    $0,%r11;				\
							\
	movq    %r10,%r12;				\
							\
	andq    mask2(%rip),%r10;			\
	andq    mask2c(%rip),%r12;			\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	shrd    $2,%r11,%r12;				\
	shrq    $2,%r11; 				\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	movq    %r8,24(%rdi);				\
	movq    %r9,32(%rdi);				\
	movq    %r10,40(%rdi);				\


#define fe1305_tau_squaren(x)				\
							\
	movq	%r8,%rbx;				\
	movq	%r9,%rbp;				\
	movq	%r10,%rcx;				\
							\
	movq	%rcx,%r14;				\
	shlq	$1,%r14;				\
							\
	movq    %rbx,%rax;				\
	mulq	%r14;					\
	movq    %rax,64(%rsp);				\
	movq    %rdx,%r12;				\
	xorq    %r13,%r13;				\
							\
	movq    %rbp,%rax;				\
	mulq	%r14;					\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	movq    %rdx,%r10;				\
	xorq    %r11,%r11;				\
							\
	movq    %rcx,%rax;				\
	mulq	%rcx;					\
	addq    %rax,%r10;				\
	adcq    %rdx,%r11;				\
							\
	movq    %rbp,%rax;				\
	mulq	%rbp;					\
	movq    %rax,72(%rsp);				\
	addq    %rdx,%r12;				\
	adcq    $0,%r13;				\
							\
	movq    %r12,%r8;				\
	movq    %r13,%r9;				\
	shld    $62,%r8,%r9;				\
	shlq    $62,%r8;				\
							\
	movq    %rbx,%rax;				\
	mulq	%rbx;					\
	addq    %rax,%r8;				\
	adcq    $0,%r9;				\
	addq    %rdx,%r12;				\
	adcq    $0,%r13;				\
							\
	movq    %r10,%rax;				\
	movq    %r11,%rdx;				\
	shld    $62,%rax,%rdx;				\
	shlq    $62,%rax;				\
	addq    %rax,%r12;				\
	adcq    %rdx,%r13;				\
							\
	movq    %rbx,%rax;				\
	mulq	%rbp;					\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
							\
	addq    64(%rsp),%r10;				\
	adcq    $0,%r11;				\
	addq    72(%rsp),%r10;				\
	adcq    $0,%r11;				\
							\
	addq    %r12,%r9;				\
	adcq    $0,%r13;				\
							\
	addq    %r13,%r10;				\
	adcq    $0,%r11;				\
							\
	movq    %r10,%r12;				\
							\
	andq    mask2(%rip),%r10;			\
	andq    mask2c(%rip),%r12;			\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	shrd    $2,%r11,%r12;				\
	shrq    $2,%r11; 				\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	movq    %r8,24*x+0(%rdi);			\
	movq    %r9,24*x+8(%rdi);			\
	movq    %r10,24*x+16(%rdi);			\
	
	
#define fe1305_mul_tau_taun(x,y)			\
							\
	movq	24*x+0(%rdi),%rbx;			\
	movq	24*x+8(%rdi),%rbp;			\
	movq	24*x+16(%rdi),%rcx;			\
							\
	movq    %rcx,%rax;				\
	mulq	8(%rdi);				\
	movq    %rax,%r8;				\
	xorq    %r9,%r9;				\
	movq    %rax,%r12;				\
	xorq    %r13,%r13;				\
	movq    %rdx,%r10;				\
	xorq    %r11,%r11;				\
	xorq    %rax,%rax;				\
	shld    $62,%rdx,%rax;				\
	shlq    $62,%rdx;				\
	addq    %rdx,%r12;				\
	adcq    %rax,%r13;				\
							\
	movq    %rbp,%rax;				\
	mulq	8(%rdi);				\
	addq    %rax,%r10;				\
	adcq    $0,%r11;				\
	addq    %rdx,%r8;				\
	adcq    $0,%r9;				\
	addq    %rdx,%r12;				\
	adcq    $0,%r13;				\
							\
	movq    %rcx,%rax;				\
	mulq	0(%rdi);				\
	addq    %rax,%r10;				\
	adcq    $0,%r11;				\
	addq    %rdx,%r8;				\
	adcq    $0,%r9;				\
	addq    %rdx,%r12;				\
	adcq    $0,%r13;				\
							\
	shld    $62,%r8,%r9;				\
	shlq    $62,%r8;				\
							\
	movq    %rbx,%rax;				\
	mulq	0(%rdi);				\
	addq    %rax,%r8;				\
	adcq    $0,%r9;				\
	addq    %rdx,%r12;				\
	adcq    $0,%r13;				\
							\
	movq    %rbx,%rax;				\
	mulq	8(%rdi);				\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
							\
	movq    %rbp,%rax;				\
	mulq	0(%rdi);				\
	addq    %rax,%r12;				\
	adcq    $0,%r13;				\
	addq    %rdx,%r10;				\
	adcq    $0,%r11;				\
							\
	addq    %r12,%r9;				\
	adcq    $0,%r13;				\
							\
	addq    %r13,%r10;				\
	adcq    $0,%r11;				\
							\
	movq    %r10,%r12;				\
							\
	andq    mask2(%rip),%r10;			\
	andq    mask2c(%rip),%r12;			\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	shrd    $2,%r11,%r12;				\
	shrq    $2,%r11; 				\
							\
	addq    %r12,%r8;				\
	adcq    %r11,%r9;				\
	adcq    $0,%r10;				\
							\
	movq    %r8,24*y+0(%rdi);			\
	movq    %r9,24*y+8(%rdi);			\
	movq    %r10,24*y+16(%rdi);			\
	
#endif
	
