Here is unsigned long division pseudocode for any size operand from wikipedia:
if D == 0 then
error(DivisionByZeroException) end
Q := 0 -- initialize quotient and remainder to zero
R := 0
for i = n-1...0 do -- where n is number of bits in N
R := R << 1 -- left-shift R by 1 bit
R(0) := N(i) -- set the least-significant bit of R equal to bit i of the numerator
if R >= D then
R := R - D
Q(i) := 1
end
end
This can be extended to include signed division as well (rounding the result toward zero, not negative infinity):
if D == 0 then
error(DivisionByZeroException) end
Q := 0 -- initialize quotient and remainder to zero
R := 0
SaveN := N -- save numerator
SaveD := D -- save denominator
if N < 0 then N = -N -- invert numerator if negative
if D < 0 then D = -D -- invert denominator if negative
for i = n-1...0 do -- where n is number of bits in N
R := R << 1 -- left-shift R by 1 bit
R(0) := N(i) -- set the least-significant bit of R equal to bit i of the numerator
if R >= D then
R := R - D
Q(i) := 1
end
end
if SaveN < 0 then
R = -R -- numerator was negative, negative remainder
if (SaveN < 0 and SaveD >= 0) or (SaveN >= 0 and SaveD < 0) then
Q = -Q -- differing signs of inputs, result is negative
Here is a relatively simple, unoptimized, untested implementation in x86 ASM (NASM syntax) that should be easy to understand:
; function div_192_96
; parameters:
; 24 bytes: numerator, high words are stored after low words
; 24 bytes: denominator, high words are stored after low words (only low 12 bytes are used)
; 4 bytes: address to store 12 byte remainder in (must not be NULL)
; 4 bytes: address to store 12 byte quotient in (must not be NULL)
; return value: none
; error checking: none
GLOBAL div_192_96
div_192_96:
pushl ebp ; set up stack frame
movl ebp, esp
pushl 0 ; high word of remainder
pushl 0 ; middle word of remainder
pushl 0 ; low word of remainder
pushl 0 ; high word of quotient
pushl 0 ; middle word of quotient
pushl 0 ; low word of quotient
movl ecx, 96
.div_loop:
jecxz .div_loop_done
decl ecx
; remainder = remainder << 1
movl eax, [ebp-8] ; load middle word of remainder
shld [ebp-4], eax, 1 ; shift high word of remainder left by 1
movl eax, [ebp-12] ; load low word of remainder
shld [ebp-8], eax, 1 ; shift middle word of remainder left by 1
shll [ebp-12], 1 ; shift low word of remainder left by 1
; quotient = quotient << 1
movl eax, [ebp-20] ; load middle word of remainder
shld [ebp-16], eax, 1; shift high word of remainder left by 1
movl eax, [ebp-24] ; load low word of remainder
shld [ebp-20], eax, 1; shift middle word of remainder left by 1
shll [ebp-24], 1 ; shift low word of remainder left by 1
; remainder(0) = numerator(127)
movl eax, [ebp+28] ; load high word of numerator
shrl eax, 31 ; get top bit in bit 0
orl [ebp-12], eax ; OR into low word of remainder
; numerator = numerator << 1
movl eax, [ebp+24] ; load 5th word of numerator
shld [ebp+28], eax, 1; shift 6th word of numerator left by 1
movl eax, [ebp+20] ; load 4th word of numerator
shld [ebp+24], eax, 1; shift 5th word of numerator left by 1
movl eax, [ebp+16] ; load 3rd word of numerator
shld [ebp+20], eax, 1; shift 4th word of numerator left by 1
movl eax, [ebp+12] ; load 2nd word of numerator
shld [ebp+16], eax, 1; shift 3rd word of numerator left by 1
movl eax, [ebp+8] ; load 1st word of numerator
shld [ebp+12], eax, 1; shift 2nd word of numerator left by 1
shll [ebp+8], 1 ; shift 1st word of numerator left by 1
; if (remainder >= denominator)
movl eax, [ebp+40] ; compare high word of denominator
cmpl eax, [ebp-4] ; with high word of remainder
jb .div_loop
ja .div_subtract
movl eax, [ebp+36] ; compare middle word of denominator
cmpl eax, [ebp-8] ; with middle word of remainder
jb .div_loop
ja .div_subtract
movl eax, [ebp+32] ; compare low word of denominator
cmpl eax, [ebp-12] ; with low word of remainder
jb .div_loop
.div_subtract:
; remainder = remainder - denominator
movl eax, [ebp+32] ; load low word of denominator
subl [ebp-12], eax ; and subtract from low word of remainder
movl eax, [ebp+36] ; load middle word of denominator
sbbl [ebp-8], eax ; and subtract from middle word of remainder (with borrow)
movl eax, [ebp+40] ; load high word of denominator
sbbl [ebp-4], eax ; and subtract from high word of remainder (with borrow)
; quotient(0) = 1
orl [ebp-24], 1 ; OR 1 into low word of quotient
jmp .div_loop
.div_loop_done:
movl eax, [ebp+56] ; load remainder storage pointer
movl edx, [ebp-12] ; load low word of remainder
movl [eax+0], edx ; store low word of remainder
movl edx, [ebp-8] ; load middle word of remainder
movl [eax+4], edx ; store middle word of remainder
movl edx, [ebp-4] ; load high word of remainder
movl [eax+8], edx ; store high word of remainder
movl eax, [ebp+60] ; load quotient storage pointer
movl edx, [ebp-24] ; load low word of quotient
movl [eax+0], edx ; store low word of quotient
movl edx, [ebp-20] ; load middle word of quotient
movl [eax+4], edx ; store middle word of quotient
movl edx, [ebp-16] ; load high word of quotient
movl [eax+8], edx ; store high word of quotient
addl esp, 24
popl ebp
ret
Please note that this is just to give you a general idea, and has not been tested. BTW, it's probably easier to calculate the quotient of two numbers of equal size in assembly than to try to work around overflow issues (which are completely unhandled in above code).