From ac0e2db600593d5b30550453b78874bfa0611751 Mon Sep 17 00:00:00 2001 From: wkj Date: Wed, 21 Apr 2004 04:45:31 +0000 Subject: Add basic libmp support for the x86. --- src/libmp/386/mkfile | 19 ++++++++++ src/libmp/386/mpdigdiv.s | 39 +++++++++++++++++++++ src/libmp/386/mpvecadd.s | 73 +++++++++++++++++++++++++++++++++++++++ src/libmp/386/mpvecdigmuladd.s | 69 +++++++++++++++++++++++++++++++++++++ src/libmp/386/mpvecdigmulsub.s | 78 ++++++++++++++++++++++++++++++++++++++++++ src/libmp/386/mpvecsub.s | 62 +++++++++++++++++++++++++++++++++ src/libmp/mkfile | 2 +- src/libmp/port/mkfile | 8 +++-- src/libmp/port/reduce | 14 ++------ 9 files changed, 349 insertions(+), 15 deletions(-) create mode 100644 src/libmp/386/mkfile create mode 100644 src/libmp/386/mpdigdiv.s create mode 100644 src/libmp/386/mpvecadd.s create mode 100644 src/libmp/386/mpvecdigmuladd.s create mode 100644 src/libmp/386/mpvecdigmulsub.s create mode 100644 src/libmp/386/mpvecsub.s (limited to 'src/libmp') diff --git a/src/libmp/386/mkfile b/src/libmp/386/mkfile new file mode 100644 index 00000000..06a0a988 --- /dev/null +++ b/src/libmp/386/mkfile @@ -0,0 +1,19 @@ +<$PLAN9/src/mkhdr + +LIB=libmp.a +SFILES=\ + mpdigdiv.s\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ + +HFILES=$PLAN9/include/mp.h ../port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +<$PLAN9/src/mksyslib diff --git a/src/libmp/386/mpdigdiv.s b/src/libmp/386/mpdigdiv.s new file mode 100644 index 00000000..f02e1616 --- /dev/null +++ b/src/libmp/386/mpdigdiv.s @@ -0,0 +1,39 @@ +.text + +.p2align 2,0x90 +.globl mpdigdiv + .type mpdigdiv, @function +mpdigdiv: + /* Prelude */ + pushl %ebp + movl %ebx, -4(%esp) /* save on stack */ + + movl 8(%esp), %ebx + movl (%ebx), %eax + movl 4(%ebx), %edx + + movl 12(%esp), %ebx + movl 16(%esp), %ebp + xorl %ecx, %ecx + cmpl %ebx, %edx /* dividend >= 2^32 * divisor */ + jae divovfl + cmpl %ecx, %ebx /* divisor == 1 */ + je divovfl + divl %ebx /* AX = DX:AX/BX */ + movl %eax, (%ebp) + jmp done + + /* return all 1's */ +divovfl: + notl %ecx + movl %ecx, (%ebp) + +done: + /* Postlude */ + movl -4(%esp), %ebx /* restore from stack */ + movl %esp, %ebp + leave + ret + +.endmpdigdiv: + .size mpdigdiv,.endmpdigdiv-mpdigdiv diff --git a/src/libmp/386/mpvecadd.s b/src/libmp/386/mpvecadd.s new file mode 100644 index 00000000..1f23dcd7 --- /dev/null +++ b/src/libmp/386/mpvecadd.s @@ -0,0 +1,73 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ + +.text + +.p2align 2,0x90 +.globl mpvecadd + .type mpvecadd, @function +mpvecadd: + /* Prelude */ + pushl %ebp + movl %ebx, -4(%esp) /* save on stack */ + movl %esi, -8(%esp) + movl %edi, -12(%esp) + + movl 12(%esp), %edx /* alen */ + movl 20(%esp), %ecx /* blen */ + movl 8(%esp), %esi /* a */ + movl 16(%esp), %ebx /* b */ + subl %ecx, %edx + movl 24(%esp), %edi /* sum */ + xorl %ebp, %ebp /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + testl %ecx,%ecx + je _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + movl (%esi, %ebp, 4), %eax + adcl (%ebx, %ebp, 4), %eax + movl %eax, (%edi, %ebp, 4) + incl %ebp + loop _addloop1 + +_add1: + /* jump if alen > blen */ + incl %edx + movl %edx, %ecx + loop _addloop2 + + /* sum[alen] = carry */ +_addend: + jb _addcarry + movl $0, (%edi, %ebp, 4) + jmp done + +_addcarry: + movl $1, (%edi, %ebp, 4) + jmp done + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + movl (%esi, %ebp, 4),%eax + adcl $0, %eax + movl %eax, (%edi, %ebp, 4) + incl %ebp + loop _addloop2 + jmp _addend + +done: + /* Postlude */ + movl -4(%esp), %ebx /* restore from stack */ + movl -8(%esp), %esi + movl -12(%esp), %edi + movl %esp, %ebp + leave + ret diff --git a/src/libmp/386/mpvecdigmuladd.s b/src/libmp/386/mpvecdigmuladd.s new file mode 100644 index 00000000..987c6330 --- /dev/null +++ b/src/libmp/386/mpvecdigmuladd.s @@ -0,0 +1,69 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +.text + +.p2align 2,0x90 +.globl mpvecdigmuladd + .type mpvecdigmuladd, @function +mpvecdigmuladd: + /* Prelude */ + pushl %ebp + movl %ebx, -4(%esp) /* save on stack */ + movl %esi, -8(%esp) + movl %edi, -12(%esp) + + movl 8(%esp), %esi /* b */ + movl 12(%esp), %ecx /* n */ + movl 16(%esp), %ebx /* m */ + movl 20(%esp), %edi /* p */ + movl %ecx, %ebp + negl %ebp /* BP = -n */ + shll $2, %ecx + addl %ecx, %esi /* SI = b + n */ + addl %ecx, %edi /* DI = p + n */ + xorl %ecx, %ecx +_muladdloop: + movl (%esi, %ebp, 4), %eax /* lo = b[i] */ + mull %ebx /* hi, lo = b[i] * m */ + addl %ecx,%eax /* lo += oldhi */ + jae _muladdnocarry1 + incl %edx /* hi += carry */ +_muladdnocarry1: + addl %eax, (%edi, %ebp, 4) /* p[i] += lo */ + jae _muladdnocarry2 + incl %edx /* hi += carry */ +_muladdnocarry2: + movl %edx, %ecx /* oldhi = hi */ + incl %ebp /* i++ */ + jnz _muladdloop + xorl %eax, %eax + addl %ecx, (%edi, %ebp, 4) /* p[n] + oldhi */ + adcl %eax, %eax /* return carry out of p[n] */ + + /* Postlude */ + movl -4(%esp), %ebx /* restore from stack */ + movl -8(%esp), %esi + movl -12(%esp), %edi + movl %esp, %ebp + leave + ret diff --git a/src/libmp/386/mpvecdigmulsub.s b/src/libmp/386/mpvecdigmulsub.s new file mode 100644 index 00000000..47089c87 --- /dev/null +++ b/src/libmp/386/mpvecdigmulsub.s @@ -0,0 +1,78 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = EX + * + */ +.text + +/* XXX: had to use "-4(%esp)" kludge to get around inability to + * push/pop without first adjusting %esp. This may not be + * as fast as using push/pop (and accessing pushed element + * with "(%esp)".) + */ + +.p2align 2,0x90 +.globl mpvecdigmulsub + .type mpvecdigmulsub, @function +mpvecdigmulsub: + /* Prelude */ + pushl %ebp + movl %ebx, -8(%esp) /* save on stack */ + movl %esi, -12(%esp) + movl %edi, -16(%esp) + + movl 8(%esp), %esi /* b */ + movl 12(%esp), %ecx /* n */ + movl 16(%esp), %ebx /* m */ + movl 20(%esp), %edi /* p */ + xorl %ebp, %ebp + movl %ebp, -4(%esp) +_mulsubloop: + movl (%esi, %ebp, 4),%eax /* lo = b[i] */ + mull %ebx /* hi, lo = b[i] * m */ + addl -4(%esp), %eax /* lo += oldhi */ + jae _mulsubnocarry1 + incl %edx /* hi += carry */ +_mulsubnocarry1: + subl %eax, (%edi, %ebp, 4) + jae _mulsubnocarry2 + incl %edx /* hi += carry */ +_mulsubnocarry2: + movl %edx, -4(%esp) + incl %ebp + loop _mulsubloop + movl -4(%esp), %eax + subl %eax, (%edi, %ebp, 4) + jae _mulsubnocarry3 + movl $-1, %eax + jmp done + +_mulsubnocarry3: + movl $1, %eax + +done: + /* Postlude */ + movl -8(%esp), %ebx /* restore from stack */ + movl -12(%esp), %esi + movl -16(%esp), %edi + movl %esp, %ebp + leave + ret diff --git a/src/libmp/386/mpvecsub.s b/src/libmp/386/mpvecsub.s new file mode 100644 index 00000000..a56b4968 --- /dev/null +++ b/src/libmp/386/mpvecsub.s @@ -0,0 +1,62 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +.text + +.p2align 2,0x90 +.globl mpvecsub + .type mpvecsub, @function +mpvecsub: + /* Prelude */ + pushl %ebp + movl %ebx, -4(%esp) /* save on stack */ + movl %esi, -8(%esp) + movl %edi, -12(%esp) + + movl 8(%esp), %esi /* a */ + movl 16(%esp), %ebx /* b */ + movl 12(%esp), %edx /* alen */ + movl 20(%esp), %ecx /* blen */ + movl 24(%esp), %edi /* diff */ + subl %ecx,%edx + xorl %ebp,%ebp /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + testl %ecx,%ecx + jz _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + movl (%esi, %ebp, 4), %eax + sbbl (%ebx, %ebp, 4), %eax + movl %eax, (%edi, %ebp, 4) + incl %ebp + loop _subloop1 + +_sub1: + incl %edx + movl %edx,%ecx + loop _subloop2 + jmp done + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + movl (%esi, %ebp, 4), %eax + sbbl $0, %eax + movl %eax, (%edi, %ebp, 4) + INCL %ebp + LOOP _subloop2 + +done: + /* Postlude */ + movl -4(%esp), %ebx /* restore from stack */ + movl -8(%esp), %esi + movl -12(%esp), %edi + movl %esp, %ebp + leave + ret + diff --git a/src/libmp/mkfile b/src/libmp/mkfile index aab57462..417804df 100644 --- a/src/libmp/mkfile +++ b/src/libmp/mkfile @@ -2,6 +2,6 @@ DIRS=\ port\ -# $systype-$objtype\ + $objtype\ <$PLAN9/src/mkdirs diff --git a/src/libmp/port/mkfile b/src/libmp/port/mkfile index a9176dd3..682c6763 100644 --- a/src/libmp/port/mkfile +++ b/src/libmp/port/mkfile @@ -33,10 +33,9 @@ FILES=\ mptov\ mptouv\ -OFILES=${FILES:%=%.$O} - +ALLOFILES=${FILES:%=%.$O} # cull things in the per-machine directories from this list -# OFILES= `{sh ./reduce $O $objtype $ALLOFILES} +OFILES= `{sh ./reduce $O $objtype $ALLOFILES} HFILES=\ $PLAN9/include/lib9.h\ @@ -44,3 +43,6 @@ HFILES=\ dat.h\ <$PLAN9/src/mksyslib + +poot: + echo $OFILES diff --git a/src/libmp/port/reduce b/src/libmp/port/reduce index a857a28c..b3980756 100644 --- a/src/libmp/port/reduce +++ b/src/libmp/port/reduce @@ -3,14 +3,6 @@ shift objtype=$1 shift -ls -p ../$objtype/*.[cs] >[2]/dev/null | sed 's/..$//' > /tmp/reduce.$pid -# -# if empty directory, just return the input files -# -if (! ~ $status '|') { - echo $* - rm /tmp/reduce.$pid - exit 0 -} -echo $* | tr ' ' \012 | grep -v -f /tmp/reduce.$pid | tr \012 ' ' -rm /tmp/reduce.$pid +reduce="("`ls -p ../$objtype/*.[cs] 2>/dev/null | sed 's/..$//' | sed 's/^.*\///' | tr '\012' '|' | sed 's/.$//'`")" + +echo $* | tr ' ' '\012' | egrep -v $reduce | tr \012 ' ' -- cgit v1.2.3