
;	for new GOGO-no-coda (1999/09)
;	Copyright (C) 1999 shigeo

;	clkbegin(), clkend()ŋ񂾕̎sԂv(musui.c̍ŌQ)
;	rdtscgȂƗON

%include "nasm.h"

; for DOS, OS/2, Win-console
; by sava
%ifdef WIN32
%define SHRINK_NULLCHAR
%endif

		globaldef	clkbegin
		globaldef	clkend
		globaldef	CLKcount
		globaldef	CLKclock
		globaldef	write_time
		externdef	write

		segment_bss
		segment_data
CLKsave		dd	0
CLKcount	dd	0
CLKclock	dd	0

		segment_code

		align	16
clkbegin:
		push	eax
		push	edx
%ifdef __tos__
		cli
%endif
		rdtsc
		mov		[CLKsave],eax
		pop		edx
		pop		eax
		ret

		align	16
clkend:
		push	eax
		push	edx
		rdtsc
%ifdef __tos__
		sti
%endif
		sub		[CLKsave],eax
		inc		dword [CLKcount]
		fld		dword [CLKclock]
		fisub	dword [CLKsave]
		fstp	dword [CLKclock]
		pop	edx
		pop	eax
		ret

;	"[%02d:%02d:%02d.%02d]"
;   min: [00:00:00.00]
;   max: [99:59:59.99]
; sprint_time
;	input:	eax, esp, edi = 0FFFFFFFh
;	output:	esp -= 13
;	destroy: eax, edx, ecx
%macro	sprint_time	2
		sub		esp,13
		mov		cl,28
		mov		[esp+0],byte '['
		mov		edx,95217CAFh	; = (2^53)/(10*60*60*100) with rounding
		stc						; for rounding
		rcl		eax,6			; time *= 2^6
		mul		edx				; edx:eax is fxp5.59
		shl		eax,1
		rcl		edx,1			; edx is fxp4.28

		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+1],al		; 10h

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0:'
		mov		[esp+2],ax		; 1h

		and		edx,edi
		lea		edx,[edx+edx*2]	; *= 3
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+4],al		; 10m

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0:'
		mov		[esp+5],ax		; 1m

		and		edx,edi
		lea		edx,[edx+edx*2]	; *= 3
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+7],al		; 10s

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0.'
		mov		[esp+8],ax		; 1s

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+10],al		; 100ms

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0]'
		mov		[esp+11],ax		; 10ms
%endmacro

;	"(%4d.%02dx) "
;   min: "(   0.00x)"
;   max: "(9999.99x)"
; sprint_speedup
;	input:	eax, esp, edi = 0FFFFFFFh
;	output:	esp -= 11
;	destroy: eax, edx, ecx
%macro	sprint_speedup	2
		sub		esp,11
		mov		cl,28
		mov		ch,20h
		mov		[esp+0],byte '('
		mov		edx,0A7C5AC47h	; = (2^48)/100000 with rounding
		stc						; for rounding
		rcl		eax,12			; *= 2^12
		mul		edx				; edx:eax is fxp4.60

		mov		eax,edx
		shr		eax,cl
		jz		%%f0
		mov		ch,30h
%%f0:	or		al,ch
		mov		[esp+1],al		; 1000x

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f1
		mov		ch,30h
%%f1:	or		al,ch
		mov		[esp+2],al		; 100x

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f2
		mov		ch,30h
%%f2:	or		al,ch
		mov		[esp+3],al		; 10x

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0.'
		mov		[esp+4],ax		; 1x

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+6],al		; 0.1x

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		eax,'0x) '
		mov		[esp+7],eax		; 0.01x
%endmacro

;	"%3d.%1%%"
;   min: "  0.0%"
;   max: "100.0%"
; sprint_percentage
;	input:	eax, esp, edi = 0FFFFFFFh
;	output:	esp -= 11
;	destroy: eax, edx, ecx
%macro	sprint_percentage	2
		sub		esp,6
		mov		cl,28
		mov		ch,20h
		mov		edx,83126E97h	; = (2^41)/1000 with rounding
		stc						; for rounding
		rcl		eax,19			; *= 2^19
		mul		edx				; edx:eax is fxp4.60

		mov		eax,edx
		shr		eax,cl
		jz		%%f0
		mov		ch,30h
%%f0:	or		al,ch
		mov		[esp+0],al		; 100%

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f1
		mov		ch,30h
%%f1:	or		al,ch
		mov		[esp+1],al		; 10%

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0.'
		mov		[esp+2],ax		; 1%

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		ax,'0%'
		mov		[esp+4],ax		; 0.1%
%endmacro

;	"%7d"
;   min: "      0"
;   max: "9999999"
; sprint_int
;	input:	eax, esp, edi = 0FFFFFFFh
;	output:	esp -= 11
;	destroy: eax, edx, ecx
%macro	sprint_int	2
		sub		esp,7
		mov		cl,28
		mov		ch,20h
		mov		edx,08637BD02h	; = (2^51)/1000000 with rounding
		stc						; for rounding
		rcl		eax,8			; *= 2^8
		mul		edx				; edx:eax is fxp5.59
		shl		eax,1
		rcl		edx,1			; edx is fxp4.28

		mov		eax,edx
		shr		eax,cl
		jz		%%f0
		mov		ch,30h
%%f0:	or		al,ch
		mov		[esp+0],al		; 1000000

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f1
		mov		ch,30h
%%f1:	or		al,ch
		mov		[esp+1],al		; 100000

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f2
		mov		ch,30h
%%f2:	or		al,ch
		mov		[esp+2],al		; 10000

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f3
		mov		ch,30h
%%f3:	or		al,ch
		mov		[esp+3],al		; 1000

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f4
		mov		ch,30h
%%f4:	or		al,ch
		mov		[esp+4],al		; 100

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		jz		%%f5
		mov		ch,30h
%%f5:	or		al,ch
		mov		[esp+5],al		; 10

		and		edx,edi
		lea		edx,[edx+edx*4]	; *= 5
		add		edx,edx			; *= 2
		mov		eax,edx
		shr		eax,cl
		or		al,30h
		mov		[esp+6],al		; 1

%endmacro

;
; void write_time(int fd, long elapsed_sec, long elapsed_usec, unsigned frameNum,
;	unsigned total_frame, unsigned sps, unsigned spf);
;
;	This routine print the progress report without floating point operation.
;	This routine can be called from the signal handler.
;
;	2000/04/19	Initial version by K.SAKAI
;	2000/04/21	Don't use printf.
;
		align	16
write_time:
		push	ebp
		push	esi
		push	ebx
		push	edi
		mov		ebp,esp		; ebp is a frame pointer
%define	stream			[ebp+16+ 4]
%define	elapsed_sec		[ebp+16+ 8]
%define	elapsed_usec	[ebp+16+12]
%define	frameNum		[ebp+16+16]
%define	total_frame		[ebp+16+20]
%define	sps				[ebp+16+24]
%define	spf				[ebp+16+28]
		mov		eax,frameNum
		test	eax,eax
		jz		near .exit
		mov		edi,0FFFFFFFh

.get_elapsed:
		mov		eax,elapsed_usec
		cdq
		mov		ecx,10000		; 10ms
		idiv	ecx
		imul	esi,elapsed_sec,100
		add		esi,eax			; = elapsed time in 10ms unit
		js		near .exit

; total = elapsed * total_frame / frameNum
.get_total:
		mov		eax,total_frame
		test	eax,eax
		jz		near .exit
		push	byte 13			; CR
		mul		esi
		div		dword frameNum
		mov		ebx,eax			; eax = total time in 10ms unit
		sprint_time	eax,edx
		push	dword ' to:'

; rest = total - elapsed
.get_rest:
		mov		eax,ebx
		sub		eax,esi			; eax = rest time in 10ms unit
		cmc
		sbb		edx,edx
		and		eax,edx
		sprint_time	eax,edx
		push	dword ' re:'

;speedup = (frameNum * spf) / (sps * elapsed)
.get_speedup
		mov		eax,sps
		mul		esi
		bsr		ecx,edx
		setnz	ch
		neg		ch
		inc		cl
		and		cl,ch
		shrd	eax,edx,cl		; eax:mantissa, cl:exponent
		mov		ebx,eax
		mov		eax,100*100
		mul		dword spf
		mul		dword frameNum
		div		ebx
		shr		eax,cl			; eax = speed up * 100
		sprint_speedup	eax,edx
		push	byte ' '

; percentage = 100.* frameNum / total_frame
.get_percentage:
		mov		eax,1000
		mul		dword frameNum
		mov		ebx,total_frame
		div		ebx				; eax = frame_per * 10
		sprint_percentage	eax,edx
		push	byte ' '

		push	byte '}'
		mov		eax,ebx
		sprint_int	eax,edx
		push	byte '/'
		mov		eax,frameNum
		sprint_int	eax,edx
		push	byte '{'

		mov		eax,ebp
		sub		eax,esp			; length of strings
		mov		edx,esp
		; Thanks to sava 00/08/10
%ifdef SHRINK_NULLCHAR
		mov		esi,esp
		mov		edi,esp
		mov		ecx,eax
.shr_lp:
		dec		ecx
		je		short .shr_brk
		mov		al,byte [esi]	; U'd like better loadsb?
		inc		esi
		cmp		al,0
		je		short .shr_lp
		mov		byte [edi],al
		inc		edi
		jmp		short .shr_lp
.shr_brk:
		mov		eax,edi
		sub		eax,esp
%endif
		and		esp,0FFFFFFFCh	; align 4
		push	eax
		push	edx
		push	dword stream
		call	write

.exit:
		mov		esp,ebp
		pop		edi
		pop		ebx
		pop		esi
		pop		ebp
		ret

		end
