/* * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) * * p -= b*m * * each step look like: * hi,lo = m*b[i] * lo += oldhi + carry * hi += carry * p[i] += lo * oldhi = hi * * the registers are: * hi = DX - constrained by hardware * lo = AX - constrained by hardware * b = SI - can't be BP * p = DI - can't be BP * i = BP * n = CX - constrained by LOOP instr * m = BX * oldhi = EX * */ .text /* XXX: had to use "-4(%esp)" kludge to get around inability to * push/pop without first adjusting %esp. This may not be * as fast as using push/pop (and accessing pushed element * with "(%esp)".) */ .p2align 2,0x90 .globl mpvecdigmulsub .type mpvecdigmulsub, @function mpvecdigmulsub: /* Prelude */ pushl %ebp movl %ebx, -8(%esp) /* save on stack */ movl %esi, -12(%esp) movl %edi, -16(%esp) movl 8(%esp), %esi /* b */ movl 12(%esp), %ecx /* n */ movl 16(%esp), %ebx /* m */ movl 20(%esp), %edi /* p */ xorl %ebp, %ebp movl %ebp, -4(%esp) _mulsubloop: movl (%esi, %ebp, 4),%eax /* lo = b[i] */ mull %ebx /* hi, lo = b[i] * m */ addl -4(%esp), %eax /* lo += oldhi */ jae _mulsubnocarry1 incl %edx /* hi += carry */ _mulsubnocarry1: subl %eax, (%edi, %ebp, 4) jae _mulsubnocarry2 incl %edx /* hi += carry */ _mulsubnocarry2: movl %edx, -4(%esp) incl %ebp loop _mulsubloop movl -4(%esp), %eax subl %eax, (%edi, %ebp, 4) jae _mulsubnocarry3 movl $-1, %eax jmp done _mulsubnocarry3: movl $1, %eax done: /* Postlude */ movl -8(%esp), %ebx /* restore from stack */ movl -12(%esp), %esi movl -16(%esp), %edi movl %esp, %ebp leave ret