aboutsummaryrefslogtreecommitdiff
path: root/src/libmp/386/mpvecdigmulsub.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/libmp/386/mpvecdigmulsub.s')
-rw-r--r--src/libmp/386/mpvecdigmulsub.s78
1 files changed, 78 insertions, 0 deletions
diff --git a/src/libmp/386/mpvecdigmulsub.s b/src/libmp/386/mpvecdigmulsub.s
new file mode 100644
index 00000000..47089c87
--- /dev/null
+++ b/src/libmp/386/mpvecdigmulsub.s
@@ -0,0 +1,78 @@
+/*
+ * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p)
+ *
+ * p -= b*m
+ *
+ * each step look like:
+ * hi,lo = m*b[i]
+ * lo += oldhi + carry
+ * hi += carry
+ * p[i] += lo
+ * oldhi = hi
+ *
+ * the registers are:
+ * hi = DX - constrained by hardware
+ * lo = AX - constrained by hardware
+ * b = SI - can't be BP
+ * p = DI - can't be BP
+ * i = BP
+ * n = CX - constrained by LOOP instr
+ * m = BX
+ * oldhi = EX
+ *
+ */
+.text
+
+/* XXX: had to use "-4(%esp)" kludge to get around inability to
+ * push/pop without first adjusting %esp. This may not be
+ * as fast as using push/pop (and accessing pushed element
+ * with "(%esp)".)
+ */
+
+.p2align 2,0x90
+.globl mpvecdigmulsub
+ .type mpvecdigmulsub, @function
+mpvecdigmulsub:
+ /* Prelude */
+ pushl %ebp
+ movl %ebx, -8(%esp) /* save on stack */
+ movl %esi, -12(%esp)
+ movl %edi, -16(%esp)
+
+ movl 8(%esp), %esi /* b */
+ movl 12(%esp), %ecx /* n */
+ movl 16(%esp), %ebx /* m */
+ movl 20(%esp), %edi /* p */
+ xorl %ebp, %ebp
+ movl %ebp, -4(%esp)
+_mulsubloop:
+ movl (%esi, %ebp, 4),%eax /* lo = b[i] */
+ mull %ebx /* hi, lo = b[i] * m */
+ addl -4(%esp), %eax /* lo += oldhi */
+ jae _mulsubnocarry1
+ incl %edx /* hi += carry */
+_mulsubnocarry1:
+ subl %eax, (%edi, %ebp, 4)
+ jae _mulsubnocarry2
+ incl %edx /* hi += carry */
+_mulsubnocarry2:
+ movl %edx, -4(%esp)
+ incl %ebp
+ loop _mulsubloop
+ movl -4(%esp), %eax
+ subl %eax, (%edi, %ebp, 4)
+ jae _mulsubnocarry3
+ movl $-1, %eax
+ jmp done
+
+_mulsubnocarry3:
+ movl $1, %eax
+
+done:
+ /* Postlude */
+ movl -8(%esp), %ebx /* restore from stack */
+ movl -12(%esp), %esi
+ movl -16(%esp), %edi
+ movl %esp, %ebp
+ leave
+ ret