PageRenderTime 23ms CodeModel.GetById 1ms app.highlight 15ms RepoModel.GetById 2ms app.codeStats 0ms

/arch/i386/math-emu/polynom_Xsig.S

https://bitbucket.org/evzijst/gittest
Assembly | 135 lines | 115 code | 20 blank | 0 comment | 0 complexity | 9ba6d435028f436e986d9da8172d3659 MD5 | raw file
  1/*---------------------------------------------------------------------------+
  2 |  polynomial_Xsig.S                                                        |
  3 |                                                                           |
  4 | Fixed point arithmetic polynomial evaluation.                             |
  5 |                                                                           |
  6 | Copyright (C) 1992,1993,1994,1995                                         |
  7 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
  8 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
  9 |                                                                           |
 10 | Call from C as:                                                           |
 11 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
 12 |                        unsigned long long terms[], int n)                 |
 13 |                                                                           |
 14 | Computes:                                                                 |
 15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
 16 | and adds the result to the 12 byte Xsig.                                  |
 17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
 18 | precision.                                                                |
 19 |                                                                           |
 20 | This function must be used carefully: most overflow of intermediate       |
 21 | results is controlled, but overflow of the result is not.                 |
 22 |                                                                           |
 23 +---------------------------------------------------------------------------*/
 24	.file	"polynomial_Xsig.S"
 25
 26#include "fpu_emu.h"
 27
 28
 29#define	TERM_SIZE	$8
 30#define	SUM_MS		-20(%ebp)	/* sum ms long */
 31#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
 32#define	SUM_LS		-28(%ebp)	/* sum ls long */
 33#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
 34#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
 35#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
 36#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
 37
 38.text
 39ENTRY(polynomial_Xsig)
 40	pushl	%ebp
 41	movl	%esp,%ebp
 42	subl	$32,%esp
 43	pushl	%esi
 44	pushl	%edi
 45	pushl	%ebx
 46
 47	movl	PARAM2,%esi		/* x */
 48	movl	PARAM3,%edi		/* terms */
 49
 50	movl	TERM_SIZE,%eax
 51	mull	PARAM4			/* n */
 52	addl	%eax,%edi
 53
 54	movl	4(%edi),%edx		/* terms[n] */
 55	movl	%edx,SUM_MS
 56	movl	(%edi),%edx		/* terms[n] */
 57	movl	%edx,SUM_MIDDLE
 58	xor	%eax,%eax
 59	movl	%eax,SUM_LS
 60	movb	%al,OVERFLOWED
 61
 62	subl	TERM_SIZE,%edi
 63	decl	PARAM4
 64	js	L_accum_done
 65
 66L_accum_loop:
 67	xor	%eax,%eax
 68	movl	%eax,ACCUM_MS
 69	movl	%eax,ACCUM_MIDDLE
 70
 71	movl	SUM_MIDDLE,%eax
 72	mull	(%esi)			/* x ls long */
 73	movl	%edx,ACCUM_LS
 74
 75	movl	SUM_MIDDLE,%eax
 76	mull	4(%esi)			/* x ms long */
 77	addl	%eax,ACCUM_LS
 78	adcl	%edx,ACCUM_MIDDLE
 79	adcl	$0,ACCUM_MS
 80
 81	movl	SUM_MS,%eax
 82	mull	(%esi)			/* x ls long */
 83	addl	%eax,ACCUM_LS
 84	adcl	%edx,ACCUM_MIDDLE
 85	adcl	$0,ACCUM_MS
 86
 87	movl	SUM_MS,%eax
 88	mull	4(%esi)			/* x ms long */
 89	addl	%eax,ACCUM_MIDDLE
 90	adcl	%edx,ACCUM_MS
 91
 92	testb	$0xff,OVERFLOWED
 93	jz	L_no_overflow
 94
 95	movl	(%esi),%eax
 96	addl	%eax,ACCUM_MIDDLE
 97	movl	4(%esi),%eax
 98	adcl	%eax,ACCUM_MS		/* This could overflow too */
 99
100L_no_overflow:
101
102/*
103 * Now put the sum of next term and the accumulator
104 * into the sum register
105 */
106	movl	ACCUM_LS,%eax
107	addl	(%edi),%eax		/* term ls long */
108	movl	%eax,SUM_LS
109	movl	ACCUM_MIDDLE,%eax
110	adcl	(%edi),%eax		/* term ls long */
111	movl	%eax,SUM_MIDDLE
112	movl	ACCUM_MS,%eax
113	adcl	4(%edi),%eax		/* term ms long */
114	movl	%eax,SUM_MS
115	sbbb	%al,%al
116	movb	%al,OVERFLOWED		/* Used in the next iteration */
117
118	subl	TERM_SIZE,%edi
119	decl	PARAM4
120	jns	L_accum_loop
121
122L_accum_done:
123	movl	PARAM1,%edi		/* accum */
124	movl	SUM_LS,%eax
125	addl	%eax,(%edi)
126	movl	SUM_MIDDLE,%eax
127	adcl	%eax,4(%edi)
128	movl	SUM_MS,%eax
129	adcl	%eax,8(%edi)
130
131	popl	%ebx
132	popl	%edi
133	popl	%esi
134	leave
135	ret