/*
 *	fhist - file history and comparison tools
 *	Copyright (C) 1991, 1992, 1993, 1994, 1995, 1998, 1999 Peter Miller;
 *	All rights reserved.
 *
 *	Derived from a work
 *	Copyright (C) 1990 David I. Bell.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 * MANIFEST: functions to compare text files
 *
 * This program is based on the algorithm in:
 *	An O(ND) Difference Algorithm and Its Variations
 *	Eugene W. Myers
 *	(TR 85-6, April 10, 1985)
 *	Department of Computer Science
 *	The University of Arizona
 *	Tuscon, Arizona 85721
 *
 * Also see:
 *	A File Comparison Program
 *	Webb Miller and Eugene W. Myers
 *	Software Practice and Experience
 *	(Volume 15, No. 11, November 1985)
 *
 * Actual implementation and features by David I. Bell.
 * Enhancements and bug fixes by Peter Miller.
 */

#include <ac/ctype.h>
#include <ac/stdio.h>
#include <ac/string.h>
#include <ac/libintl.h>

#include <compare.h>
#include <cmalloc.h>
#include <error.h>
#include <error_intl.h>
#include <fileio.h>
#include <str.h>


#define SNAKEALLOCSIZE (size_t)500	/* chunk size for snake allocation */
#define CHARALLOCSIZE (size_t)10240	/* allocation size for file data */
#define UNIQUEALLOCSIZE (size_t)512	/* unique handling size for allocation */
#define LINEALLOCSIZE (size_t)2000	/* number of lines to allocate */
#define HASHSIZE 2003			/* size of hash table */
#define CHANGEBLABINTERVAL 100		/* how often to talk about changes */
#define READBLABINTERVAL 1000		/* how often to talk about reading */
#define SNAKEBLABINTERVAL 1000		/* how often to talk about snakes */
#define	TABSIZE 8			/* tab size */
#define	DETABALLOCSIZE (size_t)128	/* buffer allocation size for detab */

static	size_t	tablesize;	/* needed table size */
static	size_t	snakecount;	/* number of snake structures used */
static	int	availchars;	/* number of available characters */
static	char	*nextchar;	/* next allocable character storage */
static	long	*V1;		/* the row containing the last d */
static	long	*V2;		/* another row */
static	SNAKE	*nextsnake;	/* next allocable snake structure */
static	SNAKE	*lastsnake;	/* end of allocable snake structures */
static	SNAKE	*endsnake;	/* last snake structure in list */
static	LINE	**hashtable;	/* hash table for lines */
static modifyline_type modifyline;

FCOMP_DATA fc;			/* externally referencable data */


/*
 * NAME
 *	canonical
 *
 * SYNOPSIS
 *	char *canonical(char *s);
 *
 * DESCRIPTION
 *	The canonical function is used to apply the -Upcase and -Spaces
 *	options to produce a canonical line for comparison purposes.
 *
 * ARGUMENTS
 *	s	- string to canonicalize
 *
 * RETURNS
 *	A pointer to the canonical line is returned.  This will be valid
 *	until the next call to this function.  If the canonical string
 *	would be the same as the input string, the input string is returned.
 */

static char *canonical _((char *));

static char *
canonical(s)
	char		*s;
{
	static size_t	bufsiz;
	static char	*buf;
	size_t		len;
	int		changed;
	int		space;
	char		*cp1;
	char		*cp2;

	/*
	 * if no modifiers are in effect,
	 * no alterations will be required
	 */
	if (!fc.upcaseflag && !fc.spaceflag)
		return s;

	/*
	 * make sure the buffer is large enough
	 */
	len = strlen(s) + 1;
	if (len > bufsiz)
	{
		if (buf)
		{
			bufsiz = len;
			buf = r_realloc_and_check(buf, bufsiz);
		}
		else
		{
			bufsiz = len;
			buf = r_alloc_and_check(bufsiz);
		}
	}

	/*
	 * simple upcase is easier to handle
	 */
	if (fc.upcaseflag)
	{
		changed = 0;
		cp1 = s;
		cp2 = buf;
		while (*cp1)
		{
			if (islower((unsigned char)*cp1))
			{
				*cp2++ = toupper((unsigned char)*cp1);
				changed = 1;
			}
			else
				*cp2++ = *cp1;
			++cp1;
		}
		if (!changed)
			return s;
		*cp2 = 0;
		return buf;
	}

	/*
	 * spaces are rather harder to handle
	 *
	 * keep track of whether a run of spaces has been seen.
	 * replace this run of spaces with a single space
	 * provided it is not at either end of a line.
	 */
	cp1 = s;
	cp2 = buf;
	space = 0;
	changed = 0;
	while (*cp1)
	{
		int	c;

		c = (unsigned char)*cp1++;
		if (fc.upcaseflag && islower(c))
		{
			c = toupper(c);
			changed = 1;
		}
		if (c == '\n')
		{
			if (space)
				changed = 1;
			*cp2++ = c;
			space = 0;
		}
		else if (isspace(c))
		{
			if (space || c != ' ')
				changed = 1;
			space = 1;
		}
		else
		{
			if (space)
			{
				if (cp2 != buf)
					*cp2++ = ' ';
				else
					changed = 1;
			}
			*cp2++ = c;
			space = 0;
		}
	}
	if (!changed)
		return s;
	*cp2 = 0;
	return buf;
}


/*
 * Allocate a string of a certain size.  This routine is used instead of
 * simply calling malloc in order to reduce the overhead.  The allocated
 * strings cannot be freed or realloced.
 */

char *
allocstr(len)
	long	len;
{
	char	*cp;

	len = (len + 3) & ~3;		/* round to multiple of 4 */
	if (len >= UNIQUEALLOCSIZE)
		return cm_alloc_and_check(len);
	cp = nextchar;
	if (len > availchars)
	{
		cp = cm_alloc_and_check(CHARALLOCSIZE);
		nextchar = cp;
		availchars = CHARALLOCSIZE;
	}
	nextchar += len;
	availchars -= len;
	return cp;
}


/*
 * Subroutine to allocate a line structure of the appropriate size and
 * add it to the hash table.  If it matches an already existing line, then
 * the old structure will be returned.
 */

static LINE *addline _((char *));

static LINE *
addline(cp)
	char	*cp;		/* string to be added */
{
	LINE	*lp;		/* current line element */
	char	*curcp;		/* current character of string */
	LINE	**hashentry;	/* hash entry to use */
	long	hash;		/* hash value */
	long	len;		/* line length */
	int	ch;		/* current char */
	char	*canon;

	/*
	 * Compute line length and hash value.
	 * This has to take into account the space and uppercase flags.
	 */
	canon = canonical(cp);
	hash = 0;
	for (curcp = canon; *curcp; curcp++)
	{
		ch = *curcp;
		hash += ((hash * 101) + ch);
		if (hash < 0)
			hash = (hash + 1) & INFINITY;
	}
	len = curcp - canon;

	/*
	 * Search proper hash chain for already existing line.
	 * If the hash accidentally matches, modify it and try again.
	 *
	 * Search on the real data, not the canonical form,
	 * that way fcomp -w -s can report more natural looking results.
	 */
again:
	hashentry = &hashtable[hash % HASHSIZE];
	for (lp = *hashentry; lp; lp = lp->l_next)
	{
		if (lp->l_hash != hash)
			continue;
		if (strcmp(cp, lp->l_data) == 0)
			return lp;
		if
		(
			(cp != canon || lp->l_canon != lp->l_data)
		&&
			strcmp(canon, lp->l_canon) == 0
		)
			continue;
		hash = ((hash * 1234321) + 1) & INFINITY;
		goto again;
	}

	/*
	 * Line not found, allocate a new one
	 */
	if (canon != cp)
	{
		lp = (LINE *)allocstr(LINE_SIZE(strlen(cp)));
		strcpy(lp->l_data, cp);
		lp->l_canon = allocstr(len + 1);
		strcpy(lp->l_canon, canon);
	}
	else
	{
		lp = (LINE *)allocstr(LINE_SIZE(len));
		strcpy(lp->l_data, cp);
		lp->l_canon = lp->l_data;
	}
	lp->l_hash = hash;
	lp->l_next = *hashentry;
	*hashentry = lp;
	return lp;
}


static int is_blank_string _((char *s));

static int
is_blank_string(s)
	char	*s;
{
	while (*s)
		if (!isspace((unsigned char)*s))
			return 0;
	return 1;
}


/*
 * Readfile - read in a file and remember the maximum number of lines it needs.
 * This routine squeezes out blank lines if specified.
 * The first specified number of lines is skipped first.
 * Then the specified number of lines is modified.
 */

static void readfile _((FILEINFO *, long, long));

static void
readfile(fi, skip, modify)
	FILEINFO	*fi;		/* file information */
	long		skip;		/* number of lines to skip */
	long		modify;		/* number of lines to modify */
{
	FILE		*fp;		/* file pointer for file to read in */
	LINE		**lines;	/* pointer to lines array */
	long		*lnums;		/* line numbers within file */
	char		*cp;		/* current line */
	long		linesavail;	/* available number of lines */
	long		linecount;	/* current number of lines */
	long		linenumber;	/* current line number */
	long		linelen;	/* length of line read */

	fi->is_binary = 0;
	if (fc.verbosity > VERBOSE_DEFAULT)
		error_raw("[Reading \"%s\"]", fi->f_name);
	fp = fi->f_file;
	if (skip > 0)
		skipf(fp, skip, fi->f_name);
	linecount = 0;
	linenumber = 0;
	linesavail = LINEALLOCSIZE;
	lines = (LINE **)cm_alloc_and_check(sizeof(LINE *) * LINEALLOCSIZE);
	lnums = (long *)cm_alloc_and_check(sizeof(long) * LINEALLOCSIZE);
	for (;;)
	{
		cp = readlinef(fp, &linelen, 0, fi->f_name, &fi->is_binary);
		if (!cp)
			break;
		if (++linenumber <= modify && modifyline)
			cp = modifyline(cp, &linelen, (struct INFO *)0);
		if
		(
			(fc.verbosity > VERBOSE_DEFAULT)
		&&
			((linenumber % READBLABINTERVAL) == 0)
		)
			error_raw("[%ld lines]", linenumber);
		if (fc.blankflag && is_blank_string(cp))
			continue;
		if (linecount >= linesavail)
		{
			linesavail += LINEALLOCSIZE;
			lines =
				(LINE **)
				cm_realloc_and_check
				(
					lines,
					sizeof(LINE *) * linesavail
				);
			lnums =
				(long *)
				cm_realloc_and_check
				(
					lnums,
					sizeof(long) * linesavail
				);
		}
		lnums[linecount] = linenumber;
		lines[linecount] = addline(cp);
		linecount++;
	}
	fi->f_lnums = lnums;
	fi->f_lines = lines;
	fi->f_linecount = linecount;
	fi->f_linestotal = linenumber;
	if (linecount > fc.maxlines)
		fc.maxlines = linecount;
	fclose_and_check(fp, fi->f_name);
	if (fc.verbosity > VERBOSE_DEFAULT)
	{
		if (linecount == linenumber)
			error_raw("[Done, %ld lines]", linenumber);
		else
			error_raw("[Done, %ld lines, %ld non-blanks]",
				linenumber, linecount);
	}
}


/*
 * Allocate a new snake structure and append it to the snake list.
 */

static SNAKE *allocsnake _((void));

static SNAKE *
allocsnake()
{
	SNAKE	*sp;		/* snake being allocated */

	sp = nextsnake;
	if (sp >= lastsnake)
	{
		sp = (SNAKE *)cm_alloc_and_check(sizeof(SNAKE) * SNAKEALLOCSIZE);
		nextsnake = sp;
		lastsnake = sp + SNAKEALLOCSIZE;
	}
	nextsnake++;
	sp->next = NULL;
	if (!fc.snakelist)
		fc.snakelist = sp;
	else
		endsnake->next = sp;
	endsnake = sp;
	if
	(
		fc.verbosity > VERBOSE_DEFAULT
	&&
		!(++snakecount % SNAKEBLABINTERVAL)
	)
		error_raw("[%ld snake elements]", snakecount);
	return sp;
}


/*
 * Routine to find the middle snake of an optimial D-path spanning
 * lines A to A+N in file A to lines B to B+N in file B.  Returns the
 * length D of the D-path as a return value, and the upper left and
 * lower right relative coordinates of a snake midway through the D-path.
 */

static long midsnake _((int depth, long A, long N, long B, long M, long *ulx,
	long *uly, long *lrx, long *lry));

static long
midsnake(depth, A, N, B, M, ulx, uly, lrx, lry)
	int	depth;
	long	A;
	long	N;
	long	B;
	long	M;
	long	*ulx;
	long	*uly;
	long	*lrx;
	long	*lry;
{
	long	D;
	long	MAXD;
	long	DELTA;
	long	odd;
	long	x;
	long	y;
	long	k;
	long	changes;
	long	oldx;
	LINE	**lp1;
	LINE	**lp2;

	if (fc.debugflag)
	{
		error_raw("%*ssearching: %ld,%ld to %ld,%ld   midsnake: ",
			depth * 2, "",
			A, B, A + N, B + M);
	}

	DELTA = N - M;
	odd = DELTA & 1;
	MAXD = (M + N + 1) / 2;
	V1[1] = 0;
	V2[-1] = 0;
	changes = -odd - 2;

	/*
	 * This is the main loop for searching for the snake.
	 * D is the distance off the diagonals, and is the number
	 * of changes needed to get from the upper left to the
	 * lower right corner of the region.
	 */
	for (D = 0; D <= MAXD; D++)
	{
		changes += 2;
		if (changes > fc.maxchanges)
		{
			sub_context_ty	*scp;

			scp = sub_context_new();
			sub_var_set(scp, "Number", "%ld", fc.maxchanges);
			fatal_intl
			(
				scp,
				i18n("More than $number changes required")
			);
			/* NOTREACHED */
			sub_context_delete(scp);
		}
		if ((fc.verbosity > VERBOSE_DEFAULT) && (depth == 0) &&
			(changes > 1) && ((changes % CHANGEBLABINTERVAL) <= 1))
				error_raw("[%ld changes]", changes - odd);

		/*
		 * Examine all diagonals within current distance.
		 * First search from upper left to lower right,
		 * and then search from lower right to upper left.
		 */
		for (k = -D; k <= D; k += 2)
		{
			/*
			 * Find the end of the furthest forward D-path
			 * in diagonal k.
			 */
			if (k == -D || (k != D && (V1[k-1] < V1[k+1])))
				x = V1[k+1];
			else
				x = V1[k-1] + 1;
			y = x - k;
			lp1 = &fc.fileA.f_lines[A+x];
			lp2 = &fc.fileB.f_lines[B+y];
			oldx = x;
			while
			(
				(x < N)
			&&
				(y < M)
			&&
				((*lp1)->l_hash == (*lp2)->l_hash)
			)
			{
				x++;
				y++;
				lp1++;
				lp2++;
			}
			V1[k] = x;

			/*
			 * See if path overlaps furthest reverse D-path.
			 * If so, then we have found the snake.
			 */
			if (odd && (k >= (DELTA - (D-1))) &&
				(k <= (DELTA + (D-1))))
			{
				if ((x + V2[k-DELTA]) >= N)
				{
					*ulx = oldx;
					*uly = oldx - k;
					*lrx = x;
					*lry = y;
					if (fc.debugflag)
						error_raw("%ld,%ld to %ld,%ld (odd)",
							*ulx, *uly, *lrx, *lry);
					return changes;
				}
			}
		}

		for (k = -D; k <= D; k += 2)
		{
			/*
			 * Find the end of the furthest reaching reverse
			 * path in diagonal k+DELTA.
			 */
			if (k == D || (k != -D && (V2[k+1] < V2[k-1])))
				x = V2[k-1];
			else
				x = V2[k+1] + 1;
			y = x + k;
			lp1 = &fc.fileA.f_lines[A+N-x-1];
			lp2 = &fc.fileB.f_lines[B+M-y-1];
			oldx = x;
			while
			(
				(x < N)
			&&
				(y < M)
			&&
				((*lp1)->l_hash == (*lp2)->l_hash)
			)
			{
				x++;
				y++;
				lp1--;
				lp2--;
			}
			V2[k] = x;

			/*
			 * See if path overlaps furthest forward D-path.
			 * If so, then we have found the snake.
			 */
			if (!odd && (k <= D-DELTA) && (k >= -D-DELTA))
			{
				if ((x + V1[k+DELTA]) >= N)
				{
					*ulx = N - x;
					*uly = M - y;
					*lrx = N - oldx;
					*lry = *lrx + *uly - *ulx;
					if (fc.debugflag)
						error_raw("%ld,%ld to %ld,%ld (even)",
							*ulx, *uly, *lrx, *lry);
					return changes;
				}
			}
		}
	}
	fatal_raw("Middle snake procedure failed (bug)");
	return 0;
}


/*
 * Recursive routine to find a minimal D-path through the edit graph
 * of the two input files.  Arguments are the beginning line numbers in
 * the files, and the number of lines to examine.  This is basically a
 * divide-and-conquer routine which finds the middle snake of an optimal
 * D-path, then calls itself to find the remainder of the path before the
 * snake and after the snake.
 */

static void findsnake _((int depth, long A, long N, long B, long M));

static void
findsnake(depth, A, N, B, M)
	int	depth;
	long	A;
	long	N;
	long	B;
	long	M;
{
	SNAKE	*sp;
	long	ulx;
	long	uly;
	long	lrx;
	long	lry;
	long	D;
	long	count;

	/*
	 * If more than one change needed, then call ourself for each part.
	 */
	D = midsnake(depth, A, N, B, M, &ulx, &uly, &lrx, &lry);
	if ((fc.verbosity > VERBOSE_DEFAULT) && (depth == 0))
	{
		error_raw("[%ld change%s total]", D, ((D == 1) ? "" : "s"));
		error_raw("[Finding edit path]");
	}

	if (D > 1)
	{
		if ((ulx > 0) && (uly > 0))
			findsnake(depth + 1, A, ulx, B, uly);
		count = lrx - ulx;
		if (count > fc.maxjoin)
		{
			sp = allocsnake();
			sp->line1 = A + ulx;
			sp->line2 = B + uly;
			sp->count = count;
		}
		N -= lrx;
		M -= lry;
		if ((N > 0) && (M > 0))
			findsnake(depth + 1, A + lrx, N, B + lry, M);
		return;
	}

	/*
	 * Only 0 or 1 change needed, so we can compute the result directly.
	 * First compute the snake coming from the upper left corner if any.
	 */
	if (N > M)
		count = uly;
	else
		count = ulx;
	if (count > fc.maxjoin)
	{
		sp = allocsnake();
		sp->line1 = A;
		sp->line2 = B;
		sp->count = count;
	}

	/*
	 * Finally compute the snake coming from the lower right corner if any.
	 */
	count = lrx - ulx;
	if (count > fc.maxjoin)
	{
		sp = allocsnake();
		sp->line1 = A + ulx;
		sp->line2 = B + uly;
		sp->count = count;
	}
}


/*
 * Routine to generate the D-path list and compute the number
 * of insertions, deletions and matching lines found.
 */

static void makesnakes _((void));

static void
makesnakes()
{
	SNAKE	*sp;		/* current snake element */
	long	line1;		/* current line in file A */
	long	line2;		/* current line in file B */

	if (fc.verbosity > VERBOSE_DEFAULT)
		error_raw("[Beginning comparisons]");
	tablesize = fc.maxlines * 2 + 1;
	V1 = (long *)cm_alloc_and_check(sizeof(long) * tablesize);
	V2 = (long *)cm_alloc_and_check(sizeof(long) * tablesize);
	V1 += fc.maxlines;
	V2 += fc.maxlines;
	if ((fc.fileA.f_linecount > 0) && (fc.fileB.f_linecount > 0))
		findsnake(0, 0L, fc.fileA.f_linecount, 0L,
			fc.fileB.f_linecount);

	/*
	 * End the list with the lower right endpoint
	 */
	sp = allocsnake();
	sp->line1 = fc.fileA.f_linecount;
	sp->line2 = fc.fileB.f_linecount;
	sp->count = 0;
	if (fc.verbosity > VERBOSE_DEFAULT)
		error_raw("[Comparisons complete]");
	if (fc.debugflag)
	{
		error_raw("%8s%8s%8s", "line1", "line2", "count");
		for (sp = fc.snakelist; sp; sp = sp->next)
			error_raw("%8ld%8ld%8ld", sp->line1, sp->line2, sp->count);
	}

	/*
	 * Scan the snake list and calculate the number of inserted,
	 * deleted, and matching lines.
	 */
	line1 = 0;
	line2 = 0;
	for (sp = fc.snakelist; sp; sp = sp->next)
	{
		fc.deletes += (sp->line1 - line1);
		fc.inserts += (sp->line2 - line2);
		fc.matches += sp->count;
		line1 = sp->line1 + sp->count;
		line2 = sp->line2 + sp->count;
	}
}


/*
 * Print the specified range of line numbers for both files.
 * The given string is also typed as part of the message.
 * This routine handles both full and line number output.
 * The real line numbers in each file are typed.
 */

static void printrange _((FILE *fp, char *msg, long line1, long count1,
	long line2, long count2));

static void
printrange(fp, msg, line1, count1, line2, count2)
	FILE		*fp;	/* output file */
	char		*msg;	/* string to type as part of output */
	long		line1;
	long		count1;
	long		line2;
	long		count2;
{
	long		beg1;
	long		end1;
	long		beg2;
	long		end2;	/* real beginning and ending line numbers */
	string_ty	*buf1;
	string_ty	*buf2;

	end1 = line1 + count1 - 1;
	end2 = line2 + count2 - 1;
	if (line1 >= fc.fileA.f_linecount)
	{
		beg1 = fc.fileA.f_linestotal + 1;
		end1 = beg1;
	}
	else
	{
		beg1 = fc.fileA.f_lnums[line1];
		end1 = fc.fileA.f_lnums[end1];
	}
	if (line2 >= fc.fileB.f_linecount)
	{
		beg2 = fc.fileB.f_linestotal + 1;
		end2 = beg2;
	}
	else
	{
		beg2 = fc.fileB.f_lnums[line2];
		end2 = fc.fileB.f_lnums[end2];
	}
	if (beg1 == end1)
		buf1 = str_format("%ld", beg1);
	else
		buf1 = str_format("%ld-%ld", beg1, end1);
	if (beg2 == end2)
		buf2 = str_format("%ld", beg2);
	else
		buf2 = str_format("%ld-%ld", beg2, end2);
	if (fc.editscriptflag)
	{
		fprintf
		(
			fp,
			"%c %s %s\n",
			*msg,
			buf1->str_text,
			buf2->str_text
		);
	}
	else
	{
		fprintf
		(
			fp,
			"\n******************** %s [A%s, B%s]:\n",
			msg,
			buf1->str_text,
			buf2->str_text
		);
	}
	str_free(buf1);
	str_free(buf2);
}


/*
 * Translate all tabs to the appropriate number of spaces in a line.
 * This is used when the line is not output at a nice tab boundary,
 * so that the output still looks correct.  Returns the new converted
 * string, which is only valid until the next call.
 */

static char *detab _((char *));

static char *
detab(str)
	char		*str;
{
	int		pos;		/* current character position */
	int		ch;		/* current character */
	char		*destcp;	/* destination pointer */
	static char	*strbuf;	/* string buffer */
	static int	maxlen;		/* maximum size of string buffer */

	if (!maxlen)
	{
		maxlen = DETABALLOCSIZE;
		strbuf = r_alloc_and_check(maxlen + TABSIZE + 1);
	}

	destcp = strbuf;
	pos = 0;
	for (;;)
	{
		ch = *str++;
		if (!ch)
			break;
		if (pos >= maxlen)
		{
			maxlen += DETABALLOCSIZE;
			strbuf = r_realloc_and_check(strbuf, maxlen + TABSIZE + 1);
			destcp = strbuf + pos;
		}
		if (ch != '\t')
		{
			*destcp++ = ch;
			pos++;
		}
		else
		{
			for (;;)
			{
				*destcp++ = ' ';
				pos++;
				if ((pos % TABSIZE) == 0)
					break;
			}
		}
	}
	*destcp = '\0';
	return strbuf;
}


/*
 * Print the specified number of lines of a file beginning at the
 * specified line number.  This prints imbedded blank lines too.
 * The lines can be preceeded by line number if desired.  Context
 * lines can be added also.  Tabs are converted to the appropriate
 * number of spaces if line numbering or tags are supplied.
 */

static void printlines _((FILE *fp, FILEINFO *fi, long line, long count,
	char *tagstr));

static void
printlines(fp, fi, line, count, tagstr)
	FILE		*fp;		/* output file */
	FILEINFO	*fi;		/* file information */
	long		line;
	long		count;
	char		*tagstr;	/* tag for line (if whatflag) */
{
	LINE		**lines;	/* data of file */
	long		*lnum;		/* line numbers */
	char		*data;		/* current data line */
	long		curlnum;	/* current line number */

	line = line - fc.context - 1;
	count += (2 * fc.context);
	if (line < 0)
	{
		count += line;
		line = 0;
	}
	if ((line + count) > fi->f_linecount)
		count = fi->f_linecount - line;
	lines = fi->f_lines + line;
	lnum = fi->f_lnums + line;
	curlnum = *lnum;
	while (count-- > 0)
	{
		while (curlnum < *lnum)
		{
			if (fc.whatflag)
				fprintf(fp, "%s\n", tagstr);
			else if (fc.numflag)
				fprintf(fp, "%c%ld\n", fi->f_tag, curlnum);
			else
				putc('\n', fp);
			curlnum++;
		}
		if (fc.whatflag)
			fprintf(fp, "%s  ", tagstr);
		else if (fc.numflag)
			fprintf(fp, "%c%-7ld ", fi->f_tag, curlnum);
		data = (*lines)->l_data;
		if (fc.whatflag)
			data = detab(data);
		fputs(data, fp);
		lines++;
		lnum++;
		curlnum++;
	}
}


/*
 * Dump lines which match, either just the line numbers or with the text.
 */

void
dumpmatch(outputname)
	char	*outputname;	/* output name, or NULL if terminal */
{
	FILE	*fp;
	SNAKE	*sp;

	if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
		error_raw("[Writing matching lines to \"%s\"]", outputname);
	if (outputname)
	{
		fp = fopen_and_check(outputname, "w");
	}
	else
	{
		fp = stdout;
		outputname = gettext("standard output");
	}
	if (fc.editscriptflag)
		fprintf(fp, "A %s\nB %s\n", fc.fileA.f_name, fc.fileB.f_name);
	else
	{
		fprintf(fp, "FILE A: %s\nFILE B: %s\n", fc.fileA.f_name, fc.fileB.f_name);
		fprintf(fp, "TOTALS: %ld inserted  %ld deleted  %ld matched\n",
			fc.inserts, fc.deletes, fc.matches);
	}
	for (sp = fc.snakelist; sp; sp = sp->next)
	{
		if (sp->count <= 0)
			continue;
		printrange(fp, "MATCH", sp->line1, sp->count, sp->line2, sp->count);
		if (!fc.editscriptflag)
		printlines(fp, &fc.fileB, sp->line2 + 1, sp->count, "");
	}
	if (fc.editscriptflag)
		fprintf(fp, "T %ld %ld %ld\n",
			fc.inserts, fc.deletes, fc.matches);
	fflush_and_check(fp, outputname);
	fclose_and_check(fp, outputname);
}


/*
 * Dump the edit script in the normal manner, showing changed line
 * numbers along with the relevent text.
 */

void
dumpnormal(outputname)
	char	*outputname;	/* output filename */
{
	FILE	*fp;		/* output file */
	SNAKE	*sp;		/* current snake */
	long	line1;
	long	line2;		/* current line numbers */
	long	count1;
	long	count2;		/* line counts */

	if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
		error_raw("[Writing differences to \"%s\"]", outputname);
	if (outputname)
	{
		fp = fopen_and_check(outputname, "w");
	}
	else
	{
		fp = stdout;
		outputname = gettext("standard output");
	}
	if (fc.quickflag)
	{
		fprintf(fp, "Files \"%s\" and \"%s\":\n", fc.fileA.f_name, fc.fileB.f_name);
		fprintf(fp, "%ld inserted  %ld deleted  %ld matched\n",
			fc.inserts, fc.deletes, fc.matches);
		goto done;
	}
	if ((fc.inserts == 0) && (fc.deletes == 0))
	{
		if (fc.verbosity)
			fprintf(fp, "Files \"%s\" and \"%s\" are identical\n",
				fc.fileA.f_name, fc.fileB.f_name);
		goto done;
	}
	fprintf(fp, "FILE A: %s\nFILE B: %s\nTOTALS: %ld inserted  %ld deleted  %ld matched\n",
		fc.fileA.f_name, fc.fileB.f_name, fc.inserts, fc.deletes, fc.matches);
	line1 = 0;
	line2 = 0;
	for (sp = fc.snakelist; sp; sp = sp->next)
	{
		count1 = sp->line1 - line1;
		count2 = sp->line2 - line2;
		if (count1 && count2)
		{
			printrange(fp, "REPLACE", line1, count1, line2, count2);
			printlines(fp, &fc.fileA, line1 + 1, count1, "");
			fputs("******************** WITH:\n", fp);
			printlines(fp, &fc.fileB, line2 + 1, count2, "");
		}
		else if (count1)
		{
			printrange(fp, "DELETE", line1, count1, line2, 1L);
			printlines(fp, &fc.fileA, line1 + 1, count1, "");
		}
		else if (count2)
		{
			printrange(fp, "INSERT", line1, 1L, line2, count2);
			printlines(fp, &fc.fileB, line2 + 1, count2, "");
		}
		line1 = sp->line1 + sp->count;
		line2 = sp->line2 + sp->count;
	}

done:
	fflush_and_check(fp, outputname);
	fclose_and_check(fp, outputname);
}


/*
 * Dump all of both files showing what happened to each line.
 */

void
dumpwhat(outputname)
	char	*outputname;	/* output name, or NULL if terminal */
{
	FILE	*fp;
	SNAKE	*sp;
	long	line1;
	long	line2;		/* current line numbers */
	long	count1;
	long	count2;		/* line counts */

	if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
		error_raw("[Writing what changes were made to \"%s\"]", outputname);
	if (outputname)
	{
		fp = fopen_and_check(outputname, "w");
	}
	else
	{
		fp = stdout;
		outputname = "(stdandard output)";
	}
	fprintf(fp, "FILE A: %s\n", fc.fileA.f_name);
	fprintf(fp, "FILE B: %s\n", fc.fileB.f_name);
	fprintf
	(
		fp,
		"TOTALS: %ld inserted  %ld deleted  %ld matched\n",
		fc.inserts,
		fc.deletes,
		fc.matches
	);
	line1 = 0;
	line2 = 0;
	for (sp = fc.snakelist; sp; sp = sp->next)
	{
		count1 = sp->line1 - line1;
		count2 = sp->line2 - line2;
		if (count1 && count2)
		{
			printlines(fp, &fc.fileA, line1 + 1, count1, "|-");
			printlines(fp, &fc.fileB, line2 + 1, count2, "|+");
		}
		else if (count1)
			printlines(fp, &fc.fileA, line1 + 1, count1, "|-");
		else if (count2)
			printlines(fp, &fc.fileB, line2 + 1, count2, "|+");
		printlines(fp, &fc.fileB, sp->line2 + 1, sp->count, "  ");
		line1 = sp->line1 + sp->count;
		line2 = sp->line2 + sp->count;
	}
	fflush_and_check(fp, outputname);
	fclose_and_check(fp, outputname);
}


/*
 * Dump only the line numbers which have changed.
 * This is useful for machine processing of the differences.
 */

void
dumplines(outputname)
	char	*outputname;	/* output file name */
{
	FILE	*fp;		/* output file */
	SNAKE	*sp;		/* current snake */
	long	line1;
	long	line2;		/* previous line numbers */
	long	count1;
	long	count2;		/* count of line numbers */

	if ((fc.verbosity > VERBOSE_DEFAULT) && outputname)
		error_raw("[Writing edit script to \"%s\"]", outputname);
	if (outputname)
	{
		fp = fopen_and_check(outputname, "w");
	}
	else
	{
		fp = stdout;
		outputname = gettext("standard output");
	}
	line1 = 0;
	line2 = 0;
	fprintf(fp, "A %s\nB %s\n", fc.fileA.f_name, fc.fileB.f_name);
	for (sp = fc.snakelist; sp; sp = sp->next)
	{
		count1 = sp->line1 - line1;
		count2 = sp->line2 - line2;
		if (count1 && count2)
			printrange(fp, "R", line1, count1, line2, count2);
		else if (count1)
			printrange(fp, "D", line1, count1, line2, 1L);
		else if (count2)
			printrange(fp, "I", line1, 1L, line2, count2);
		line1 = sp->line1 + sp->count;
		line2 = sp->line2 + sp->count;
	}
	fprintf(fp, "T %ld %ld %ld\n", fc.inserts, fc.deletes, fc.matches);
	fflush_and_check(fp, outputname);
	fclose_and_check(fp, outputname);
}


/*
 * Reinitialize for a new file comparison.
 * This zeroes out our static variables so that another
 * file comparison will work correctly.
 */
void
fcompreset()
{
	tablesize = 0;
	snakecount = 0;
	availchars = 0;
	fc.inserts = 0;
	fc.deletes = 0;
	fc.matches = 0;
	fc.snakelist = NULL;
	nextchar = NULL;
	V1 = NULL;
	V2 = NULL;
	nextsnake = NULL;
	lastsnake = NULL;
	endsnake = NULL;
	hashtable = NULL;
}


void
binary_warning(filename)
	const char	*filename;
{
	sub_context_ty	*scp;

	scp = sub_context_new();
	sub_var_set(scp, "File_Name", "%s", filename);
	error_intl(scp, i18n("warning: $filename is binary"));
	sub_context_delete(scp);
}


void
binary_fatal(filename)
	const char	*filename;
{
	sub_context_ty	*scp;

	scp = sub_context_new();
	sub_var_set(scp, "File_Name", "%s", filename);
	fatal_intl(scp, i18n("$filename is binary"));
	/* NOTREACHED */
	sub_context_delete(scp);
}


/*
 * Routine to do the comparison of two files.
 * If either of the input filenames is NULL, then this indicates
 * that the file has already been read by an earlier call.
 */

void
fcomp(nameA, nameB)
	char	*nameA;
	char	*nameB;		/* file names to compare */
{
	long	changes;	/* total number of changes */

	fc.fileA.f_tag = 'A';
	fc.fileB.f_tag = 'B';
	if (nameA)
	{
		fc.fileA.f_name = nameA;
		fc.fileA.f_file = fopen_and_check(nameA, "r");
	}
	if (nameB)
	{
		fc.fileB.f_name = nameB;
		fc.fileB.f_file = fopen_and_check(nameB, "r");
	}
	if (!nameA && !nameB)
	{
		fatal_intl(0, i18n("input file names not supplied"));
	}
	if (!hashtable)
	{
		int	j;

		hashtable =
			(LINE **)cm_alloc_and_check(HASHSIZE * sizeof(LINE *));
		for (j = 0; j < HASHSIZE; ++j)
			hashtable[j] = 0;
	}
	if (nameA)
	{
		readfile(&fc.fileA, fc.fileAskip, fc.fileAmodify);
		if (fc.fileA.is_binary)
			binary_warning(nameA);
	}
	if (nameB)
	{
		readfile(&fc.fileB, fc.fileBskip, fc.fileBmodify);
		if (fc.fileB.is_binary)
			binary_warning(nameB);
	}
	changes = fc.fileA.f_linecount - fc.fileB.f_linecount;
	if (changes < 0)
		changes = -changes;
	if (changes > fc.maxchanges)
	{
		sub_context_ty	*scp;

		scp = sub_context_new();
		sub_var_set(scp, "Number", "%ld", fc.maxchanges);
		fatal_intl(scp, i18n("More than $number changes required"));
		/* NOTREACHED */
		sub_context_delete(scp);
	}
	makesnakes();
}


void
modifyline_register(func)
	modifyline_type func;
{
	modifyline = func;
}
