This is my fifth solution (return a 32 Bit value; I've removed fourth and third mistaken/UN-optimized solutions); it uses only one IMUL; note that both abs(X) && abs(Y) they couldn't be > (1<<14)-1, abs(Z) must be < (1<<29):
__asm
{
/* INPUT: EAX= X */
/* EDX= Y */
/* EBX= Z */
/* TEMP: EDX */
/* OUTPUT: EAX= -2*xy-3*z) */
NEG EAX
IMUL EDX /* EDX:EAX=-x*y */
ADD EAX,EAX /* EAX=-2*x*y */
LEA EDX,[EBX+EBX]
ADD EDX,EBX
SUB EAX,EDX /* EAX=-2*x*y-z*3 */
MOV [RESULT],EAX
}
This is my second solution (return a 64 Bit value); it uses only one IMUL; now is optimized. Note that both abs(X) && abs(Y) they couldn't be > (1<<30)-1. I've supposed that RESULT, in this case, is a 64 Bit integer:
__asm
{
/* INPUT: EAX= X */
/* EDX= Y */
/* EBX= Z */
/* TEMP: ESI, EDI */
/* OUTPUT: EDX:EAX= -(2*xy+3*z) */
IMUL EDX
ADD EAX,EAX
ADC EDX,EDX /* EDX:EAX=2*xy */
MOV ESI,EBX
ADD ESI,ESI
SBB EDI,EDI /* EDI:ESI=2*z */
ADD ESI,EBX
ADC EDI,EDI /* EDI:ESI=3*z */
ADD EAX,ESI
ADC EDX,EDI /* EDX:EAX=2*xy+z*3 */
NOT EAX
NOT EDX
ADD EAX,1
ADC EDX,0 /* EDX:EAX=-(2*xy+z*3) */
LEA ESI,[RESULT]
MOV [ESI],EAX
MOV [ESI+4],EDX
}
But remember that each of two product has a result of more then 64 Bit; in assembly (return a 96 Bit value):
; -2xy-3z=-(2*xy+3*z)
; INPUT: EAX= X
; EDX= Y
; ECX= Z
; TEMP: EDI, ESI, EBP
; OUTPUT: EBX:EDX:EAX= -(2*xy+3*z)
IMUL EDX
ADD EAX,EAX
RCL EDX,1
SBB EBX,EBX ;EBX:EDX:EAX=2*xy
MOV ESI,ECX
ADD ESI,ESI
SBB EDI,EDI
MOV EBP,EDI ;EBP:EDI:ESI=z*2
ADD ESI,ECX
ADC EDI,EDI
ADC EBP,EBP ;EBP:EDI:ESI=z*3
ADD EAX,ESI
ADC EDX,EDI
ADC EBX,EBP ;EBX:EDX:EAX=2*xy+z*3
NOT EAX
NOT EDX
NOT EBX
ADD EAX,1
ADC EDX,0
ADC EBX,0 ;EBX:EDX:EAX=-(2*xy+z*3)