;
; eagle.asm
;
; Eagle version 0.20 for NASM
;
; Written and Copyright 1998 by Dirk Stevens
;
; compile with : nasm -f coff eagle.asm for use with DJGPP to create eagle.o
;
; Only distribute this file together with the complete eagle
; package
;
; History : 
; Date		Version		Comments
; 15-March-1998	0.20			-First NASM version
;						-Added parameter for Eagle_Lines
;						-Added support for MMX
;						-Optimised further for non-MMX
;						-Autodetect MMX
; To Do:
; - Support for 16 bit color
; - Investigate color interpolation
;
;
;------------------------------------------------------------

	  BITS 32
	  GLOBAL _eagle_lines

	  SECTION .text


;eagle_lines (  unsigned long *lb,
;               unsigned long *lb2,
;               int width,
;               int *address,
;               int *address2 )
_eagle_lines:

	; test for mmx
	mov eax,1
	cpuid
	test edx, 0x00800000
	jnz near _eagle_lines_mmx

	push ebp

	mov ebp, esp
      mov esi,[ebp+8]
	mov ebx,[ebp+12]
	mov ecx,[ebp+16]
	mov edi,[ebp+20]
	mov eax,[ebp+24]

	mov ebp, ebx

	shr ecx,2

	xor dx,dx

.L0
	
	push ecx

	xchg eax,ecx

	lodsd
	mov ebx,[ebp]
	add ebp,4
	push ebp


	cmp eax,ebx
	jne near .L999

	rol eax, 8
	cmp eax,ebx
	jne .L998
	jmp .L997

.L998
	ror eax,8
	jmp .L999

.L997
	ror eax,8
	cmp dl, dh
	jne near .L999
	cmp dl, al
	jne near .L999

	mov [edi],eax
	mov [ecx],ebx
	mov [edi+4],eax
	mov [ecx+4],ebx
	add edi,8
	add ecx,8

	pop eax
	mov ebp,eax

	pop eax
	xchg eax,ecx

	dec ecx
	cmp ecx, 0
	je near .L333

	push ecx
	xchg eax,ecx

	lodsd

	cmp ebx, eax
	jne .L888

	mov ebx,[ebp]
	add ebp,4

	cmp ebx, eax
	jne .L889

	mov [edi],eax
	mov [ecx],ebx
	mov [edi+4],eax
	mov [ecx+4],ebx
	add edi,8
	add ecx,8

	pop eax
	xchg eax,ecx

	dec ecx
	cmp ecx,0
	jne near .L0

	;loop near .L0

	jmp .L333

.L888
	mov ebx,[ebp]
	add ebp, 4
.L889
	push ebp
.L999

	cmp dh, bl
	jne .L6001
.L1001
	cmp bl, al
	jne .L3001
.L2001
	mov [edi],al
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi,2
	add ecx,2
	jmp .L11
.L3001
	cmp dh, dl
	jne .L5001
.L4001
	mov [edi], dx
	mov [ecx], dx
	add edi,2
	add ecx,2
	jmp .L11
.L5001
	mov [edi],dl
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi,2
	add ecx,2
	jmp .L11
.L6001
	cmp dl, al
	jne .L12001
.L7001
	cmp dl, dh
	jne .L9001
.L8001
	mov [edi], dx
	mov [ecx], dx
	add edi,2
	add ecx,2
	jmp .L11
.L9001
	cmp al,bl
	jne .L11001
.L10001
	mov [edi],dl
	mov [ecx],bl
	mov [edi+1],al
	mov [ecx+1],bl
	add edi,2
	add ecx,2
	jmp .L11
.L11001
.L12001
	mov [edi],dl
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi,2
	add ecx,2

.L11
	cmp bl, bh
	jne .L6002
.L1002
	cmp bh, ah
	jne .L3002
.L2002
	mov [edi],bx
	mov [ecx],bx
	add ecx,2
	add edi,2
	jmp .L12
.L3002
	cmp bl, al
	jne .L5002
.L4002
	mov [edi],bx
	mov [ecx],bx
	add ecx, 2
	add edi, 2
	jmp .L12
.L5002
	mov [edi],ax
	mov [ecx],bx
	add edi, 2
	add ecx, 2
	jmp .L12
.L6002
	cmp al, ah
	jne .L12002
.L7002
	cmp al, bl
	jne .L9002
.L8002
	mov [ecx],ax
	mov [edi],ax
	add ecx, 2
	add edi, 2
	jmp .L12
.L9002
	cmp ah,bh
	jne .L11002
.L10002
	mov [ecx],ax
	mov [edi],ax
	add ecx, 2
	add edi, 2
	jmp .L12
.L11002
.L12002
	mov [edi], ax
	mov [ecx], bx
	add edi, 2
	add ecx, 2
	
.L12
	ror ebx, 8
	ror eax, 8

	cmp bl, bh
	jne .L6003
.L1003
	cmp bh, ah
	jne .L3003
.L2003
	mov [edi],bx
	mov [ecx],bx
	add edi, 2
	add ecx, 2
	jmp .L13
.L3003
	cmp bl, al
	jne .L5003
.L4003
	mov [edi],bx
	mov [ecx],bx
	add edi, 2
	add ecx, 2
	jmp .L13
.L5003
	mov [edi], ax
	mov [ecx], bx
	add edi, 2
	add ecx, 2
	jmp .L13
.L6003
	cmp al, ah
	jne .L12003
.L7003
	cmp al, bl
	jne .L9003
.L8003
	mov [ecx], ax
	mov [edi], ax
	add ecx, 2
	add edi, 2
	jmp .L13
.L9003
	cmp ah,bh
	jne .L11003
.L10003
	mov [ecx], ax
	mov [edi], ax
	add ecx, 2
	add edi, 2
	jmp .L13
.L11003
.L12003
	mov [edi],ax
	mov [ecx],bx
	add edi, 2
	add ecx, 2
.L13
	ror ebx, 8
	ror eax, 8

	cmp bl, bh
	jne .L6004
.L1004
	cmp bh, ah
	jne .L3004
.L2004
	mov [edi], bx
	mov [ecx], bx
	add edi, 2
	add ecx, 2
	jmp .L14
.L3004
	cmp bl, al
	jne .L5004
.L4004
	mov [edi], bx
	mov [ecx], bx
	add edi, 2
	add ecx, 2
	jmp .L14
.L5004
	mov [edi], ax
	mov [ecx], bx
	add edi, 2
	add ecx, 2
	jmp .L14
.L6004
	cmp al, ah
	jne .L12004
.L7004
	cmp al, bl
	jne .L9004
.L8004
	mov [ecx], ax
	mov [edi], ax
	add ecx, 2
	add edi, 2
	jmp .L14
.L9004
	cmp ah,bh
	jne .L11004
.L10004
	mov [ecx], ax
	mov [edi], ax
	add ecx, 2
	add edi, 2
	jmp .L14
.L11004
.L12004
	mov [edi], ax
	mov [ecx], bx
	add edi, 2
	add ecx, 2

.L14
	mov dl, ah
	mov dh, bh

	pop eax
	mov eax, ebp

	pop eax
	xchg eax,ecx

	dec ecx
	jnz near .L0

.L333
	pop eax
	mov ebp, eax
	ret


;eagle_lines_mmx(   unsigned long *lb,
;                   int width,
;                   int *address,
;                   int *address2 )
_eagle_lines_mmx:

;       .align 4

	push ebp

	mov ebp, esp
      mov esi,[ebp+8]
	mov ebx,[ebp+12]
	mov ecx,[ebp+16]
	mov edi,[ebp+20]
	mov eax,[ebp+24]

	mov ebp, ebx

	shr ecx, 3              ; divide by eight because mmx registers are 8 bytes

	xor dx,dx

.L0
	push ecx
	xchg eax,ecx

	movq mm0,[esi]
	movq mm1,[ebp]
	add esi,8
	add ebp,8

	movd eax, mm0
	movd ebx, mm1

	cmp dh, bl
	jne .L6001
.L1001
	cmp bl, al
	jne .L3001
.L2001
	mov [edi],al
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi, 2
	add ecx, 2
	jmp .L11
.L3001
	cmp dh, dl
	jne .L5001
.L4001
	mov [edi],dx
	mov [ecx],dx
	add edi, 2
	add ecx, 2
	jmp .L11
.L5001
	mov [edi],dl
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi, 2
	add ecx, 2
	jmp .L11
.L6001
	cmp dl, al
	jne .L12001
.L7001
	cmp dl, dh
	jne .L9001
.L8001
	mov [edi],dx
	mov [ecx],dx
	add edi, 2
	add ecx, 2
	jmp .L11
.L9001
	cmp al,bl
	jne .L11001
.L10001
	mov [edi],dl
	mov [ecx],bl
	mov [edi+1],al
	mov [ecx+1],bl
	add edi, 2
	add ecx, 2
	jmp .L11
.L11001
.L12001
	mov [edi],dl
	mov [ecx],dh
	mov [edi+1],al
	mov [ecx+1],bl
	add edi, 2
	add ecx, 2
.L11

	movq mm4, mm1

	movq mm2, mm0

      pcmpeqd mm7, mm7		; set mm7 to FFFFFFFFFFFFFFFF

	pcmpeqb mm4, mm0		; byte compare equal mm0 with mm1 and store in mm4

	psllq mm2, 8		; shift mm0 left one byte and store in mm2

	movq mm3, mm0

	movq mm5, mm4		; store byte compare mm0 with mm1 in mm5

	pcmpeqb mm2, mm0		; byte compare mm0 with mm2 and store in mm2

	psrlq mm3, 8		; shift mm0 right one byte and store in mm3

	pand mm2, mm5

      movq mm6, mm1		; and mm5 with mm1

	movq mm5, mm2		; not mm2 and store in mm5

      pxor mm5, mm7

      psllq mm6, 8

	pand mm2, mm0		; and mm0 with mm2

      pand mm5, mm6

	por mm2, mm5    		; mm2 now contains right-bottom of quad

	pcmpeqb mm3, mm0		; byte compare mm3 with mm0 and store in mm3

	movq mm5, mm4		; byte compare of mm0 with mm1 store in mm5

	pand mm3, mm4

	movq mm5, mm3		; not mm3 store in mm5

	movq mm6, mm1

      pxor mm5, mm7

	pand mm3, mm0

      psrlq mm6, 8

	pand mm5, mm6

      psrlq mm2, 8

	por mm3, mm5    

; now write the 16 bytes of the bottom line

      movq mm4, mm2

	movq mm6, mm2

      punpcklbw mm4, mm3

	punpckhbw mm6, mm3

	movq [ecx],mm4

	movq [ecx+8],mm6


; start with top line

	movq mm2, mm1
	movq mm3, mm1
	movq mm4, mm0

      pcmpeqd mm7, mm7		; set mm7 to FFFFFFFFFFFFFFFF

	pcmpeqb mm4, mm1		; byte compare equal mm0 with mm1 and store in mm4

	psllq mm2, 8		; shift mm0 left one byte and store in mm2

	psrlq mm3, 8		; shift mm0 right one byte and store in mm3

	movq mm5, mm4		; store byte compare mm0 with mm1 in mm5

	pcmpeqb mm2, mm1		; byte compare mm0 with mm2 and store in mm2

	pand mm2, mm5

	movq mm5, mm2		; not mm2 and store in mm5
      pxor mm5, mm7

	pand mm2, mm1		; and mm0 with mm2

      movq mm6, mm0		; and mm5 with mm1
      psllq mm6, 8
      pand mm5, mm6

	por mm2, mm5    		; mm2 now contains right-bottom of quad


	movq mm5, mm4		; byte compare of mm0 with mm1 store in mm5

	pcmpeqb mm3, mm1		; byte compare mm3 with mm0 and store in mm3

	pand mm3, mm5

	movq mm5, mm3		; not mm3 store in mm5

	pand mm3, mm1

	movq mm6, mm0

      pxor mm5, mm7

      psrlq mm6, 8

	pand mm5, mm6

	por mm3, mm5    

; now write the 16 bytes of the top line

      psrlq mm2, 8

      movq mm4, mm2

      movq mm6, mm2

      punpcklbw mm4, mm3

      punpckhbw mm6, mm3

	movq [edi],mm4

	movq [edi+8],mm6

      psrlq mm0, 56
      psrlq mm1, 56

      add ecx, 14
      add edi, 14

	movd edx, mm0
	movd ebx, mm1
	mov  dh, bl

	pop eax
	xchg eax,ecx

	dec ecx
	jnz near .L0

.L333
	pop eax
	mov ebp, eax

	emms

	ret


