//	ver	021004

	mov		ezx,front	//BGRA
	movd		mm0,ezx		//0000BGRA
	movd		mm1,back

	pxor		mm4,mm4		//00000000
	pxor		mm5,mm5		//00000000
	pxor		mm6,mm6		//00000000
	pxor		mm7,mm7		//00000000

1	xor		zh,zh		//BG0A
1	movd		mm2,zx		//0000000A
1	punpcklwd	mm2,mm2		//00000A0A
1	punpckldq	mm2,mm2		//0A0A0A0A

2	neg		zl		//BGRA		negative Alpha
2	movd		mm3,zx		//0000000A
2	punpcklwd	mm3,mm3		//00000A0A
2	punpckldq	mm3,mm3		//0A0A0A0A

3	punpcklbw	mm0,mm4		//0B0G0R0A
3	punpcklbw	mm1,mm5		//0B0G0R0A
3	punpcklbw	mm6,mm2		//A0A0A0A0
3	punpcklbw	mm7,mm3		//A0A0A0A0	negative Alpha

4	pmulhw		mm0,mm6		//front x          Alpha
4	pmulhw		mm1,mm7		//back  x negative Alpha

5	paddusw		mm0,mm1		//result = front + back
6	packuswb	mm0,mm4		//0000BGRA




//	ver	021004	interleave code

	mov		ezx,front	//BGRA
	movd		mm0,ezx		//0000BGRA
	movd		mm1,back

1	xor		zh,zh		//BG0A
	pxor		mm4,mm4		//00000000
1	movd		mm2,zx		//0000000A
	pxor		mm5,mm5		//00000000
2	neg		zl		//BGRA		negative Alpha
	pxor		mm6,mm6		//00000000
2	movd		mm3,zx		//0000000A
	pxor		mm7,mm7		//00000000
1	punpcklwd	mm2,mm2		//00000A0A
2	punpcklwd	mm3,mm3		//00000A0A
1	punpckldq	mm2,mm2		//0A0A0A0A
2	punpckldq	mm3,mm3		//0A0A0A0A

3	punpcklbw	mm0,mm4		//0B0G0R0A
3	punpcklbw	mm1,mm5		//0B0G0R0A
3	punpcklbw	mm6,mm2		//A0A0A0A0
3	punpcklbw	mm7,mm3		//A0A0A0A0	negative Alpha

4	pmulhw		mm0,mm6		//front x          Alpha
4	pmulhw		mm1,mm7		//back  x negative Alpha

5	paddusw		mm0,mm1		//result = front + back
6	packuswb	mm0,mm4		//0000BGRA




