/************************************************************************\
 * File Version Information
 * $Header: /Altair32v3/srcdbg.c 25    12/20/13 9:55p Racini $
 ************************************************************************/
/************************************************************************\
  MITS Altair Emulator
  Source-level debugging support
  
  Source-level debugger support for the Solace Sol-20 emulator
  Copyright (c) Jim Battle, 2000-2002

  Modifications for the Altair32 Emulator
  Copyright (c) 2001-2016 Richard A. Cini


Change Log:
  2002/05/17  RAC -- Initial changes
  2002/07/07  RAC -- RELEASE MARKER -- v2.3
  2002/08/23  RAC -- RELEASE MARKER -- v2.30.10
  2002/11/15  RAC -- RELEASE MARKER -- v2.40.2100
  2003/04/26  RAC -- RELEASE MARKER -- v2.50.2045
  2004/07/30  RAC -- RELEASE MARKER -- v3.00.0135
  2006/05/12  RAC -- RELEASE MARKER -- v3.10.0200
  2006/06/13  RAC -- Change to OvlSortOrder to sync with Solace 3.2
  2006/11/15  RAC -- RELEASE MARKER -- v3.20.0400
  2011/09/17  RAC -- RELEASE MARKER -- v3.30.0800
  2013/02/03  RAC -- RELEASE MARKER -- v3.32.1100
  2013/12/31  RAC -- RELEASE MARKER -- v3.33.2100
  2016/02/20  RAC -- RELEASE MARKER -- v3.34.0900
\************************************************************************/

/*
 Solace -- Sol Anachronistic Computer Emulation
 A Win32 emulator for the Sol-20 computer.

 Copyright (c) Jim Battle, 2002

 this module provides services for supporting source-level debugging.
 it maintains a list of assembly language listing files and information
 about how they are to be used; it parses the contents of those files,
 and it can respond with the source lines that match a given address.

 Design notes:

>>>>>>>> user interface <<<<<<<<
(what the user of Solace sees)

On start up, there are no source overlays present.  The existing
debugging commands operate like the currently do.

The general idea is to let the user specify the source code that matches
whatever is in memory so that the disassembler window can show the original
source code, comments and all, while debugging.

These are called "source overlays".  More than one can be resident.
There are commands to load and unload them, and to refresh them.
They can also be made active or inactive.

Because there are comment lines, blank lines, pagefeed characters
interpersed between "real" source lines, the question arises of which
lines should be associated with a given address.  The decision is all
source lines up to an including the next active "op" (either instruction
or assembler pseudo-op) shall be associated with that op.  The assumption
is that comments most often preceed the instruction, and only rarely
trail the operation.  Thus:

1  ; these comments are "bound" to
1  ; the next line, the "inr b".
1   LOOP:  INR B   ; on this line
2          XRA A
3  ; this comment belongs to the next op, the "JP"
3          JP LOOP
4  ; this is associated with the next...

The numbers in the left column show how lines are grouped


Operation:

When a file overlay is sourced, the debugger reads in the file specified
from the host PC filesystem (not CP/M, unfortunately, although this could
be done via an escape mechanism like import and export are done...)  They
are read in and preprocessed into an internal structure, one per file.

Any time the disassembler window displays a range of addresses, the overlays
are consulted to decide whether to do a raw disassembly or to display the
overlay source (or both).  If there is an overlap between overlays, the
overlay which was defined later takes priority.

Later feature: autoloading.  It is convenient for the system to automatically
check for an overlay whenever a binary is loaded.  These have the special
flag "autoload" set in the overlay structure.  These are 2nd class citizens
in that the system can decide to remove them without the user asking for it
(in the same way they got loaded without an explicit user request).  The
decision can be made based on either or both static and dynamic grounds.
If the overlay is covered by more recently loaded overlays, we might toss
it out if too little of it is "visible."  Dynamically, if too much of the
overlay is not meaningful, we might toss the rest too.  Ovl*PruneOverlays()
make this decision.

>>>>>>>> implementation <<<<<<<<

-- parsing --

We parse a file in one pass, and we just use one 64K entry lookup table
to look up which source line corresponds to which address.  Because some
things in the data structure are dynamically allocated, we allocated a
chunk of memory and fill it up as we go; if it proves to be too small,
we realloc it and continue.  The memory for each text line could have
been allocated a line at a time, but instead it is one big block too to
cut down on fragmentation.

The file parsed must conform to the output of ASM or MAC.  By inspection,
the format is fixed field, like this:

[header][source]

[header] is always the first 16 chars of the line, and is one of:
  [16 spaces]                           (comment lines; non-code-emitting)
  [FLAG|space][AAAA][11 spaces]         (used by ORG and DS)
  [FLAG|space][AAAA][space]=[9 spaces]  (used by SET and EQU)
  [FLAG|space][AAAA][gap][2*N hexchars][10-2*N spaces]

[FLAG] is a single character used to mark warnings and errors.
Be careful about some flagged lines -- they may look kind of like a
valid op, but since they don't emit code, they should be treated
like a comment line.

[AAAA] is usually the address associated with this line of code,
       but in the case of SET and EQU, it is the value of the label.

[gap] is <space> normally, but is "+" if the line is the result of
      a macro expansion.

[source] is
    [whitespace][;<comment...>]
    [whitespace]<op>[whitespace][args][;<comment...>]
    <LABEL[:]>[whitespace]<op>[whitespace][args][;<comment...>]
    others?

we really don't care much about the format of [source], just [header].

MAC can prepend a ^L to a line to indicate formfeed.  These are
simply be stripped out and not considered.

^Z marks the end of file, but it isn't required.
*/

#define _CRT_SECURE_NO_WARNINGS			// BAD thing to do
#include <windows.h>
#include <stdio.h>		// C-lib - I/O
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

#include "altair32.h"
#include "i8080.h"		//for Di80 macro
#include "srcdbg.h"

#define MAX_OVERLAYS 10	// maximum number of overlays that can be resident

// debugging aids
#define DEBUG1 (0)	// dump overlay struct while parsing
#define DEBUG2 (0)	// implement a main() for stand-alone testing


// type declarations
enum tagOPTYPE {
	OPTYPE_ORG,
	OPTYPE_DS,
	OPTYPE_DB,
    OPTYPE_EQU,
	OPTYPE_END,
	OPTYPE_OTHER };

// this represents one "op" -- normally an 8080 instruction,
// but it may also be a pseudo-op, like DW, DS, DB, etc.
#define MAX_QLEN (5)
typedef struct tagOPT{
    uint16 addr;	// the address of this op
    uint16 len;		// how many bytes are covered by this op
			// must be 16b because of DS ops
    uint32 textoff;	// offset into text block
    uint8  lines;	// number of lines in text; 0 means must be computed
    uint8  overlay;	// which overlay contains this op
    uint8  optype;	// stores operation type
    uint8  qlen;	// how many qualifying bytes are meaningful
    uint8  byte[MAX_QLEN];	// bytes that should be associated with this op
} op_t;


// this struct describes an overlay, which describes each "op" in the
// file.  an "op" consists of all the text trailing the preceeding op
// and including the op line.  if there are multiple lines in the op,
// they are separated by '\n' and the group are null terminated; there
// is no '\n' on the final line.  there is also a pointer to the list
// of ops that are used to quickly index the text for a given op.
typedef struct tagOVRT{
    int   sernum;	// creation serial number, starting with 1.
    int   order;	// refresh serial number
    int   autoload;	// was the overlay automatically loaded?
    int   minaddr;	// lowest address seen
    int   maxaddr;	// greatest address seen
    int   maxlinelen;	// longest line in overlay
    char *filename;	// fully specified drive:/path/filename.extention
    ovr_stat_t status;	// both, on, off
    char *text;		// slightly massaged source file
    int   num_ops;	// number of ops in the list
    op_t *op;		// pointer to array of op_t
} ovr_t;


// this table contains all the overlays.  they are sorted in the
// order they were defined, and are packed so there are no gaps
// between overlays.  num_overlays is the number of current overlays.
static ovr_t overlay[MAX_OVERLAYS];
static int num_overlays;
static int next_sernum;
static int refresh_sernum;

// brute force -- for every byte of memory, point at the op_t
// that covers the byte.  NULL means no op_t covers the byte.
// with only a 16b address space, this is acceptable.
static op_t *opmap[0x10000];

// forward declarations
static void overlay_free(ovr_t *overlay);
static int  overlay_file2index(char *filename);
static int  parse_header_info(char *p, op_t *op, int *lineaddr);
static int  parse_optype(char *p);
static void detab_text(char *dst, char *src, int bufsize);
static int  overlay_id2index(int id);
static int  overlay_build(char *filename, ovr_t *pov);
static void OvlStaticPruneOverlays(int lower_limit);


// -- code --

// called at the dawn of time
void
OvlInit(void)
{
    num_overlays   = 0;
    next_sernum    = 1;
    refresh_sernum = 1;
}


// called at the apocalypse
void
OvlCleanup(void)
{
    int i;
    for(i=0; i<num_overlays; i++)
	overlay_free(&overlay[i]);
    num_overlays = 0;
}


// this reads in a source file and parses it into an overlay.
// if the file already exists in the overlay table, replace
// the existing entry; otherwise, create a new entry.
// returns one of OVLERR_OK, OVLERR_MALLOC, OVLERR_TOOMANY,
// or OVLERR_FILEOPEN.
int
OvlParse(char *filename, int autoloaded)
{
    int idx;
    int stat;

    ASSERT(filename != NULL);

    // first, see if the file even exists on the disk
    {
	FILE *fp = fopen(filename, "r");
	if (fp == NULL)
	    return OVLERR_FILEOPEN;
	fclose(fp);
    }

    idx = overlay_file2index(filename);
    if (idx < 0) {
	// this file isn't already in the table
	if (num_overlays >= MAX_OVERLAYS)
	    return OVLERR_TOOMANY;
	idx = num_overlays++;
	overlay[idx].sernum   = next_sernum++;
	overlay[idx].autoload = autoloaded;
	overlay[idx].minaddr  = 0x10000;
	overlay[idx].maxaddr  = 0x0000;
	overlay[idx].text     = (char*)NULL;
	overlay[idx].num_ops  = 0;
	overlay[idx].status   = OVERLAY_ON;		// default
	overlay[idx].op       = (op_t*)NULL;
	overlay[idx].filename = _strdup(filename);
	if (overlay[idx].filename == NULL)
	    return OVLERR_MALLOC;
    } else {
	// if it exists, clean it out
	overlay_free(&overlay[idx]);
    }
    // we use this information to keep track of which overlays are more Recent
    overlay[idx].order = refresh_sernum++;

    // build overlay
    stat = overlay_build(filename, &overlay[idx]);

    if (stat != OVLERR_OK)
	overlay[idx].status = OVERLAY_OFF;

    return stat;
}


// returns the overlay filename given the id.
// a return of NULL means error.
char *
OvlID2File(int id)
{
    int idx = overlay_id2index(id);
    if (idx < 0)
	return NULL;
    return overlay[idx].filename;
}


// returns the overlay ID given the filename.
// a return of <0 means error.
int
OvlFile2ID(char *filename)
{
    int idx = overlay_file2index(filename);
    if (idx < 0)
	return OVLERR_BADID;
    return overlay[idx].sernum;
}


// set the both/on/off mode associated with an existing entry.
// returns either OVLERR_OK or OVLERR_BADID.
int
OvlSetMode(int id, ovr_stat_t status)
{
    int idx = overlay_id2index(id);
    if (idx < 0)
	return OVLERR_BADID;
    overlay[idx].status = status;
    return OVLERR_OK;
}


// removes an overlay from the list.
// returns either OVLERR_OK or OVLERR_BADID.
int
OvlRemove(int id)
{
    int idx;
    int i;

    idx = overlay_id2index(id);
    if (idx < 0)
	return OVLERR_BADID;

    overlay_free(&overlay[idx]);

    // compact the table
    for(i=idx; i<num_overlays-1; i++)
	overlay[i] = overlay[i+1];

    num_overlays--;
    ASSERT(num_overlays >= 0);

    return OVLERR_OK;
}


// given an overlay id, figure out what percentage of the overlay
// is still valid.  ignore any DS ops.
static int
OvlMatchingPercentage(int idx)
{
    int total_ops    = 0;
    int matching_ops = 0;
    int num_ops, i;
    op_t *op;

#if 0
    idx = overlay_id2index(id);
    if (idx < 0)
	return 0;
#endif

    num_ops = overlay[idx].num_ops;
    op      = overlay[idx].op;

    for(i=0; i<num_ops; i++, op++) {
	int bytes = op[i].qlen;
	if ((op->optype == OPTYPE_DB) || (op->optype == OPTYPE_OTHER)) {
	    int match = 1;
	    int addr = op->addr;
	    int n;
	    for(n=0; n<bytes; n++, addr++)
		match &= (op->byte[n] == Di80((word)addr));
	    total_ops++;
	    matching_ops += match;
	}
    }

    if (total_ops == 0)
	return 0;
    else
	return (100*matching_ops)/total_ops;
}


// figure out what is the longest source line in all active overlays
int
OvlMaxLineLen(void)
{
    int i;
    int maxlen = 50;	// set a reasonable lower bound

    // scan through each overlay
    for(i=0; i<num_overlays; i++) {
	if ((overlay[i].status != OVERLAY_OFF) &&
	    (overlay[i].maxlinelen > maxlen))
	    maxlen = overlay[i].maxlinelen;
    }
    return maxlen;
}


// every time an overlay is parsed, it gets the next larger serial number.
// thus, older overlays have lower "order" serial numbers than more recent
// ones.  we use this rather than the creation order.
static int ovl_order[MAX_OVERLAYS];
static void
OvlSortOrder(void)
{
    int min_idx   = -1;
    int low_bound = -1;
    int i;

    for(i=0; i<num_overlays; i++) {
	// scan for smallest id that we haven't seen yet
	int min_order = (1<<30);
	int j;
	for(j=0; j<num_overlays; j++) {
	    if ((overlay[j].order > low_bound) &&
		(overlay[j].order < min_order)) {
		min_order = overlay[j].order;
		min_idx   = j;
	    }
	}
	ASSERT(min_idx >= 0);
	ovl_order[i] = min_idx;
	low_bound = min_order;
    }
}


// build the table of which ops cover which bytes in memory
static void
OvlHitInitBuild(void)
{
    int i;

    for(i=0; i<=0xFFFF; i++)
	opmap[i] = NULL;

    OvlSortOrder();

    // scan through each overlay and update the opmap
    for(i=0; i<num_overlays; i++) {
	int n = ovl_order[i];	// get the next most recent
	int j;
	for(j=0; j<overlay[n].num_ops; j++) {
	    op_t *op = &(overlay[n].op[j]);
	    int addr = op->addr;
	    int k;
	    for(k=0; k<op->len; k++) {
		// will clobber any earlier mapping
#if 0
		if (opmap[addr] != NULL) {
		    printf("Warning: overlapping overlays at addr %04h", addr);
		}
#endif
		opmap[addr++] = op;
	    }
	}
    }
}


// build the table of which ops cover which bytes in memory
void
OvlHitInit(void)
{
    // get rid of any overlays where the memory image
    // doesn't match the overlay contents.
    OvlDynamicPruneOverlays(10);

    // get rid fo any overlays where the overlay is "invisible"
    // because it has been overlayed by subsequent overlays.
    // FIXME: this is really inefficient, but simple
    OvlHitInitBuild();
    OvlStaticPruneOverlays(15);
    OvlHitInitBuild();
}


// given an overlay id, figure out what percentage of the overlay
// is not hidden by subsequent overlays.
static int
OvlVisiblePercentage(int idx)
{
    int total_ops   = 0;
    int visible_ops = 0;
    int num_ops, i;
    op_t *op;

#if 0
    int idx = overlay_id2index(id);
    if (idx < 0)
	return 0;
#endif

    num_ops = overlay[idx].num_ops;
    op      = overlay[idx].op;

    for(i=0; i<num_ops; i++, op++) {
	int addr = op[i].addr;
	if (opmap[addr] == &op[i])
	    visible_ops++;
	total_ops++;
    }

    if (total_ops == 0)
	return 0;
    else
	return (100*visible_ops)/total_ops;
}


int
OvlHit(int addr)
{
    op_t *op;
    int i;

    ASSERT(addr >=0 && addr <= 0xFFFF);

    op = opmap[addr];
    if (op == NULL)
	return OVLHIT_NO;

    // we might have a match.
    // does the op begin exactly at this address?
    if (addr != op->addr)
	return OVLHIT_FLAWED;

    // we might have a match.  does the source code match
    // the bytes actually located at this address?
    for(i=0; i<op->qlen; i++)
	if (Di80((word)(addr+i)) != op->byte[i])
	    return OVLHIT_FLAWED;

    return OVLHIT_YES;	// perfect match
}


// returns a pointer to the multi-line string for the source
// code corresponding to this address.   returns NULL on error.
char *
OvlSrc(int addr)
{
    op_t *op;
    int id, idx;

    ASSERT(addr >=0 && addr <= 0xFFFF);

    op = opmap[addr];
    if (op == NULL)
	return NULL;

    // pass back a pointer to the source
    id  = op->overlay;
    idx = overlay_id2index(id);
// FIXME: this is slow.  perhaps we should fixup all the op->overlay
//        indices or somehow have a mapping table instead of a serial
//        number that must be searched for.
    return &(overlay[idx].text[op->textoff]);
}


// returns what overlay mode applies to the op at the specified address.
// code corresponding to this address.
int
OvlMode(int addr)
{
    op_t *op;
    int id, idx;

    ASSERT(addr >=0 && addr <= 0xFFFF);

    op = opmap[addr];
    if (op == NULL)
	return OVERLAY_OFF;

    // pass back a pointer to the source
    id  = op->overlay;
    idx = overlay_id2index(id);
// FIXME: this is slow.  perhaps we should fixup all the op->overlay
//        indices or somehow have a mapping table instead of a serial
//        number that must be searched for.
    return overlay[idx].status;
}


// indicate how many lines of source correspond to the supplied address
int
OvlLines(int addr)
{
    op_t *op;
    ASSERT(addr >=0 && addr <= 0xFFFF);
    op = opmap[addr];
    return (op == NULL) ? 1 : op->lines;
}


// return 1 if this is an executable op
int
OvlOpType(int addr)
{
    op_t *op;
    ASSERT(addr >=0 && addr <= 0xFFFF);
    op = opmap[addr];
    if (op == NULL)
	return 1;	// assume it is
    if (op->optype == OPTYPE_OTHER)
	return 1;
    return 0;		// DB, DS, etc
}


// indicate how many bytes this op spans
int
OvlOpLen(int addr)
{
    op_t *op;
    ASSERT(addr >=0 && addr <= 0xFFFF);
    op = opmap[addr];
    return (op == NULL) ? OpLen((word)addr) : op->len;
}


// return the opcode of the first instruction found preceeding
// the one specified.  this routine has a number of heuristics that
// apply in different cases.
//
// 1) in the best case, we simply decrement addr and see if that byte
//    is covered by an entry in an overlay.  if it is, we must adjust
//    the address to match the starting address for that instruction,
//    and we are done.
//
// 2) otherwise, we need to guess, but we can try and make a good guess.
//    this is tricky in that the previous op might start at pc-1, pc-2,
//    or pc-3.  We can't always tell which as the byte at pc-1 might be
//    a one byte op, or it might be an immediate operand to the 2-byte op
//    at pc-2, or they both might be immediate operands to an op at pc-3.
//
//    the heuristic is to move the PC back many bytes and roll forward.
//    if we hit the current PC, we assume it is OK, but if the known good
//    PC falls in the middle of an instruction, we assume we guessed a
//    bad starting point and try another.
//
// 3) if neither of those works, check if the opcode 1, 2, and 3 bytes back
//    are of a consistent length (1, 2, or 3 bytes long, respectively).
//    pick the first one that matches.
//
// 4) if none of those works, just go back one byte
word
OvlPreviousOp(word addr)
{
    int guess, guess1;
    int i;

    if (addr == 0x0000)
	return 0x0000;	// don't wrap around

    // try case #1
    if (opmap[addr-1] != NULL) {
	int mismatch = 0;
	int a = (opmap[addr-1]->addr);
	int i;
	for(i=0; i<(opmap[addr-1]->qlen); i++) {
	    if (opmap[a]->byte[i] != Di80((word)(a+i))) {
		mismatch = 1;
		break;
	    }
	}
	if (!mismatch)
	    return a;
	// else fall through to case #2
    }


    // try case #2
    guess1 = addr - 30;  // go back 10 max_length instructions
    // try three different starting phases
    for(guess=guess1; guess<guess1+3; guess++) {

	int a = guess;
	int prev_addr = 0x0000;
	int i;

	for(i=0; i<11*3; i++) {	// rolled back (n+10)*3 bytes, which might be all 1 byte or all 3 byte or a mix

	    int len = OpLen((word)a);

	    if (a == addr)
		return prev_addr;

	    prev_addr = a;
	    a += len;
	    if (len > addr)
		break;	// we blew past the target
	}
    }


    // try case #3
    for(i=1; i<4; i++)
	if (OpLen((word)(addr-i)) == i)
	    return (word)(addr-i);


    // case #4
    return (word)(addr-1);
}


// these two allow the overlay information to be reported.
// unlike the other external (Ovl*) interfaces, they deal in
// indexes, not id's.
int
OvlNumOverlays(void)
{
    return num_overlays;
}

void
OvlInfo(int idx, char **fname, int *id, int *autoloaded,
	ovr_stat_t *status, int *minaddr, int *maxaddr)
{
    ASSERT(idx < num_overlays);

    if (fname != NULL)
	*fname = overlay[idx].filename;
    if (id != NULL)
	*id = overlay[idx].sernum;
    if (autoloaded != NULL)
	*autoloaded = overlay[idx].autoload;
    if (status != NULL)
	*status = overlay[idx].status;
    if (minaddr != NULL)
	*minaddr = overlay[idx].minaddr;
    if (maxaddr != NULL)
	*maxaddr = overlay[idx].maxaddr;
}


// remove autoloaded overlays that are probably not meaningful.
// the input parameter is the threshold of when overlays get dropped.
static void
OvlStaticPruneOverlays(int lower_limit)
{
    int dead_id[MAX_OVERLAYS];
    int num_dead = 0;
    int i;

    for(i=0; i<num_overlays; i++) {
	if (overlay[i].autoload) {
	    int pct = OvlVisiblePercentage(i);
	    if (pct < lower_limit) {
		dead_id[num_dead++] = overlay[i].sernum;
	    }
	}
    }

    for(i=0; i<num_dead; i++)
	OvlRemove(dead_id[i]);
}

// remove autoloaded overlays that are probably not meaningful.
// the input parameter is the threshold of when overlays get dropped.
void
OvlDynamicPruneOverlays(int lower_limit)
{
    int dead_id[MAX_OVERLAYS];
    int num_dead = 0;
    int i;

    for(i=0; i<num_overlays; i++) {
	if (overlay[i].autoload) {
	    int pct = OvlMatchingPercentage(i);
	    if (pct < lower_limit) {
		dead_id[num_dead++] = overlay[i].sernum;
	    }
	}
    }

    for(i=0; i<num_dead; i++)
	OvlRemove(dead_id[i]);
}


// ==================== local helper procedures ====================

// returns the overlay index from the fully qualified filename.
// a return of -1 means error.
static int
overlay_file2index(char *filename)
{
    int idx = -1;
    int i;

    ASSERT(filename != NULL);

    for(i=0; i<num_overlays; i++) {
	if (strcmp(overlay[i].filename,filename) == 0) {
	    idx = i;
	    break;
	}
    }

    return idx;
}


// given a serial number, return index of corresponding overlay[] entry
// returns -1 if no match is found.
static int
overlay_id2index(int id)
{
    int idx = -1;
    int i;

    for(i=0; i<num_overlays; i++) {
	if (overlay[i].sernum == id) {
	    idx = i;
	    break;
	}
    }

    return idx;
}


static void
overlay_free(ovr_t *overlay)
{
    if (overlay->filename != NULL) {
	free(overlay->filename);
	overlay->filename = (char*)NULL;
    }

    if (overlay->text != NULL) {
	free(overlay->text);
	overlay->text = (char*)NULL;
    }

    if (overlay->op != NULL) {
	free(overlay->op);
	overlay->op = (op_t*)NULL;
    }
}


// this reads in a source file and parses it into the overlay
// structure passed in.  it is assumed that the structure passed
// in to us does not currently contain anything meaningful.
//
// we initially allocate a block of memory for each of pov->text
// and pov->op.  as we parse the file we copy things into these
// blocks.  if a block ever runs out, we realloc() the block to
// be twice as big and continue.
//
// return 0 on OK, non-zero on error.
#define K *1024
#define INIT_TEXT_SIZE (64 K)	// in bytes  \__ 16 chars/line avg.
#define INIT_NUM_OPS   ( 4 K)	// in ops    /

// this is the number of data bytes which may appear in the
// header field of the .PRN file.
#define NUM_DATA_BYTES (5)

static int
overlay_build(char *filename, ovr_t *pov)
{
    // overlay global
    FILE *fp;		// file handle
    char linebuf[1024];	// holds next line read in
    char procbuf[1024];	// processed line
    int valid_op;	// are we appending to an op?
    int max_ops;	// # of ops before we must reallocate
    int textmax;	// # of bytes of text before we must reallocate
    int textoff;	// # of bytes of text currently used
    int lines;		// # of lines of text for this op
    int rv;		// return value
    op_t *prev_op;	// pointer to previous op
    int   prev_ambig;	// unknown length for previous op
    int optype, newop, textlen, lineaddr;
    char *lb;

    ASSERT(filename != NULL);
    ASSERT(strlen(filename) < 300);

    // init relevant overlay members

    pov->filename = _strdup(filename);
    if (pov->filename == NULL)
	return OVLERR_MALLOC;

    textmax  = INIT_TEXT_SIZE;
    pov->text = malloc(textmax);
    if (pov->text == NULL) {
	overlay_free(pov);
	return OVLERR_MALLOC;
    }

    max_ops = INIT_NUM_OPS;
    pov->op = (op_t*)malloc(max_ops*sizeof(op_t));
    if (pov->op == NULL) {
	overlay_free(pov);
	return OVLERR_MALLOC;
    }

    pov->num_ops    = 0;
    pov->maxlinelen = 0;	// max length for raw disassembly


    // open file
    fp = fopen(filename, "r");
    if (fp == NULL) {
	overlay_free(pov);
	return OVLERR_FILEOPEN;
    }


    // parse file

    prev_op    = NULL;
    prev_ambig = 0;
    textoff    = 0;
    valid_op   = 0;
    rv         = OVLERR_OK;	// until proven otherwise

    for(;;) {

	// get next line
	(void)fgets(linebuf, sizeof(linebuf), fp);
	if (feof(fp))
	    break;	// drop out of enclosing for()

	// make space to hold this op
	if (!valid_op) {
	    if (pov->num_ops > max_ops-1) {
		op_t *newops = (op_t*)malloc(2*max_ops*sizeof(op_t));
		if (newops == NULL) {
		    rv = OVLERR_MALLOC;
		    break;
		}
		pov->op = newops;
		max_ops *= 2;
	    }
	    pov->op[pov->num_ops].len     = 0;
	    pov->op[pov->num_ops].qlen    = 0;
	    pov->op[pov->num_ops].overlay = pov->sernum;
	    pov->op[pov->num_ops].textoff = textoff;
	    pov->op[pov->num_ops].lines   = 0;	// placeholder
	    lines    = 0;
	    valid_op = 1;
	}

	// skip leading ^L if it exists
	lb = linebuf;	// line input buffer
	if (*lb == ('L'&31))
	    lb++;

	if (strlen(lb) < 16) {
	    newop = 0;
	    procbuf[0] = '\0';
	    textlen = 0;
	    optype = OPTYPE_OTHER;
	} else {
	    newop = parse_header_info(lb, &(pov->op[pov->num_ops]), &lineaddr);
	    detab_text(procbuf, lb+16, sizeof(procbuf));
	    textlen = strlen(procbuf);
	    optype = parse_optype(procbuf);
	}


	// parse_header_info can't always tell how many bytes a given
	// op covers: DS doesn't give away how many bytes it has set
	// aside; DB and DW may define more than 4 bytes of data,
	// therefore just looking at the byte field doesn't indicate
	// how many bytes were allocated.  We can also lose out when
	// the user puts multiple ops on one line, eg "lxi h,0 ! lxi d,0".
	// In these cases, we make a guess and set a flag indicating that
	// the true op length isn't known.  When we parse subsequent lines,
	// when we get the address of the next op, we can fill in the
	// correct length for the previous op.  This can be defeated,
	// though, if an ORG comes along after a DS (say), so in that case
	// we clear the flag and let the old lower bound guess stand.

	// resolve the length of the previous op if we can
	if (prev_ambig) {
	    switch (optype) {
		case OPTYPE_EQU:
		    break;
		case OPTYPE_ORG:
		    prev_ambig = 0;	// can't figure it out
		    if (DEBUG1)
			printf(">>> previous op length remains ambiguous\n");
		    break;
		default:
		    if (lineaddr >= 0) {
			prev_ambig = 0;	// resolved
			if (lineaddr < prev_op->len) {
			    printf("Warning: bad stuff at %d\n", __LINE__);
			} else {
			    prev_op->len = lineaddr - prev_op->addr;
			    if (DEBUG1)
				printf(">>> patchup: op at 0x%04x has length %d\n",
				    prev_op->addr, prev_op->len);
			}
		    }
		    break;
	    } // switch
	}

	pov->op[pov->num_ops].optype = optype;

	// see if we have a new prev_ambig situation
	if (optype == OPTYPE_DS) {
	    prev_op    = &(pov->op[pov->num_ops]);
	    prev_ambig = 1;
	    prev_op->len = 1;	// conservative placeholder
	    newop = 1;		// terminate this op, start a new one
	} else if (newop && (pov->op[pov->num_ops].len == NUM_DATA_BYTES)) {
	    // yes, it is ambiguous
	    prev_op    = &(pov->op[pov->num_ops]);
	    prev_ambig = 1;
	    prev_op->len = NUM_DATA_BYTES; // conservative placeholder
	}


	// append line to current op
	if (textoff + textlen + 1 > textmax) {
	    char *newtext = realloc(pov->text, textmax*2);
	    if (newtext == NULL) {
		rv = OVLERR_MALLOC;
		break;
	    }
	    pov->text = newtext;
	    textmax *= 2;
	}
	strcpy( &(pov->text[textoff]), procbuf);
	textoff += textlen;
	if (textlen+16 > pov->maxlinelen)	// header+source
	    pov->maxlinelen = textlen+16;
	lines++;

	if (!newop) {
	    // add trailing '\n' to cause continuation of this line
	    pov->text[textoff++] = '\n';
	    pov->text[textoff] = '\0';
	} else {
	    textoff++;		// leave trailing null
	    valid_op = 0;	// this op is done
	    if (lineaddr < pov->minaddr)
		pov->minaddr = lineaddr;
	    if (lineaddr > pov->maxaddr)
		pov->maxaddr = lineaddr;
	    if (DEBUG1) {
		op_t *p = &(pov->op[pov->num_ops]);
		printf(">>>>>>>>>>>>>>> op[%d]: addr=0x%04x, len=%d <<<<<<<<<<<<<<<<<<<<<<\n%s\n",
		    pov->num_ops, p->addr, p->len, &(pov->text[p->textoff]));
	    }
	    // 0 means count is too big to fit
	    if (lines < 256)
		pov->op[pov->num_ops].lines = (lines < 256) ? lines : 0;
	    pov->num_ops++;
	}

    } // for()


    if (rv != 0)
	overlay_free(pov);	// error

    fclose(fp);
    return rv;
}


// pass back the binary value of a hex digit.
// return 1 if value, 0 if invalid.
static int
hexvalue(char c, int *v)
{
    int rv = 0;
    if (c >= '0' && c <= '9') {
	*v = (c - '0');
	rv = 1;
    } else if (c >= 'A' && c <= 'F') {
	*v = (c - 'A' + 10);
	rv = 1;
    } else if (c >= 'a' && c <= 'f') {
	*v = (c - 'a' + 10);
	rv = 1;
    }
    return rv;
}

// pass back an 8b number decoded from hex.
// return 1 if legal, 0 if not legal.
static int
hexbyte(char *p, int *v)
{
    int rv = 0;
    int d1, d0;
    if (hexvalue(p[0], &d1) && hexvalue(p[1], &d0)) {
	*v = (d1 << 4) + d0;
	rv = 1;
    }
    return rv;
}

// pass back an 16b number decoded from hex.
// return 1 if legal, 0 if not legal.
static int
hexword(char *p, int *v)
{
    int rv = 0;
    int b1, b0;
    if (hexbyte(p, &b1) && hexbyte(p+2, &b0)) {
	*v = (b1 << 8) + b0;
	rv = 1;
    }
    return rv;
}

#if 0
static int
is_hexdigit(char c)
{
    return (c >= '0' && c <= '9') ||
	   (c >= 'A' && c <= 'F') ||
	   (c >= 'a' && c <= 'f');
}
#endif


// return 1 if a newop, otherwise return 0
static int
parse_header_info(char *p, op_t *op, int *lineaddr)
{
    int addr, qlen;
    char *s = p+1;	// skip first char of line

    *lineaddr = -1;	// none

    // header must be at least 16 bytes long
    ASSERT(strlen(p) >= 16);

    // next four bytes must be an address
    if (!hexword(s, &addr))
	return 0;
    s += 5;	// skip addr and a [ |+]

    // pass back the address of this op
    // warning: SET and EQU have this field but aren't real addresses
    *lineaddr = addr;

    // count how many bytes are defined by this line
    qlen = 0;
    for(qlen=0; qlen<NUM_DATA_BYTES; qlen++, s+=2) {
	int val;
	if (!hexbyte(s, &val))
	    break;
	op->byte[qlen] = val;
    }

    op->addr = addr;
    op->qlen = qlen;
    op->len  = qlen;	// may be overridden later

    if (qlen == 0)
	return 0;	// not an op (or maybe DS)
    else
	return 1;	// yes, it appears to be an op
}


// figure out what type of op is on this line
static int
parse_optype(char *p)
{
    typedef struct {
	char *label;
	int   labeltype;
    } optable_t;

    static const optable_t optable[] = {
	{ "DS",  OPTYPE_DS  },
	{ "DB",  OPTYPE_DB  },
	{ "DW",  OPTYPE_DB  },
	{ "EQU", OPTYPE_EQU },
	{ "SET", OPTYPE_EQU },
	{ "ORG", OPTYPE_ORG },
	{ "END", OPTYPE_END },
    };

    const int optable_size = sizeof(optable) / sizeof(optable_t);

    char *s = p;
    char label[16];
    char opname[16];
    int labellen, opnamelen;
    int i;

    // if there is something in the first column, it is the label.
    // we don't need this for now, but later we may build a symbol table.
    label[0] = '\0';
    labellen = 0;
    if (isalpha((int)*s)) {
	// scan up to first non-alpha-numeric character
	while (isalnum((int)*s)) {
	    if (labellen < sizeof(label)-1)
		label[labellen++] = toupper(*s);
	    s++;
	}
	label[labellen] = '\0';
	// skip optional trailing ':'
	if (*s == ':')
	    s++;
    }

    // skip white space
    while (isspace((int)*s))
	s++;

    // next look for the opcode
    opname[0] = '\0';
    opnamelen = 0;
    if (isalpha((int)*s)) {
	// scan up to first non-alpha-numeric character
	while (isalnum((int)*s)) {
	    if (opnamelen < sizeof(opname)-1)
		opname[opnamelen++] = toupper(*s);
	    s++;
	}
	opname[opnamelen] = '\0';
    }

    // seach the table for a match
    for(i=0; i<optable_size; i++) {
	if (!strcmp(opname, optable[i].label)) {
	    // got a match
	    return optable[i].labeltype;
	}
    }

    return OPTYPE_OTHER;
}


// convert tabs to spaces and limit the length of the string
static void
detab_text(char *dst, char *src, int bufsize)
{
    int stuff = 0;
    char *sp;

    for(sp=src; *sp && (stuff < bufsize-1); sp++) {
	switch (*sp) {
	    case '\r':
	    case '\n':
		// swallow it
		break;
	    case '\t':
		// pad to next tabstop
		do {
		    dst[stuff++] = ' ';
		} while (stuff & 7);
		break;
	    default:
		// just copy it
		dst[stuff++] = *sp;
		break;
	}
    }
    dst[stuff] = '\0';
}

// ========================================================================

#if DEBUG2
int   OpLen(word addr) { return 1; }
uint8 DiZ80(word addr) { return (uint8)0x00; }

int
main(void)
{
    int stat;
    int id;

    OvlInit();

    stat = OvlParse("./PC.PRN", 0);
    printf("OvlParse returned %d\n", stat);
    id = OvlFile2ID("./PC.PRN");

    stat = OvlSetMode(id, OVERLAY_ON);
    printf("OvlSetMode returned %d\n", stat);

    OvlHitInit();

    while(1) {

	int addr;
	char input[100];
	char *text;

	printf("Enter an address:");
	fflush(stdout);
	if (scanf("%s", input) != 1)
	    break;
	addr = strtol(input, (char**)NULL, 0);

	stat = OvlHit(addr);
	text = OvlSrc(addr);
	if (stat == OVLHIT_NO)
	    printf("--> no hit\n");
	else if (stat == OVLHIT_YES)
	    printf("--> %s\n", text);
	else if (stat == OVLHIT_FLAWED)
	    printf("!!> %s\n", text);
    }
    printf("\n");

    OvlCleanup();

    return 0;
}
#endif
/* end of file: srcdbg.c */
