
#include "yodl.h"

						/* return a token */
#define SETTOKEN(x)     { lextok = x; dumpstate (); return; }

static void
dumpstate ()
{
  if (verbose >= 5)
    {
      message (5, "\n---- Lexer state: ----\n"
	       "lexbuf: %s, lextok: %d\n"
	       "pre-pushed input: %s\n",
	       lexbuf,
	       lextok,
	       lexer_pushedp && *lexer_pushedp ? lexer_pushedp : "<none>");
    }
}

static int
readnextchar ()			/* make sure \-newline */
{				/* gets treated right */
  int
    ch;

  ch = lexer_getc ();		/* read from file or buffer */

  if (ch != '\\')		/* no backslash ? */
    return (ch);		/* done. */

  /* had a \ character there.. */
  ch = lexer_getc ();		/* so read one more */

  if (ch != '\n')		/* not a \ newline series */
    {
      lexer_pushchar (ch);	/* push back last one */
      return ('\\');		/* return backslash as-is */
    }

  /* yup, we read \ newline: */
  while ((ch = lexer_getc ()) == ' ' ||		/* eat up white space */
	 (ch == '\t')
    )
    ;

  return (ch);			/* return first non-ws */
}

void
lexer ()
{
  int
    ch,				/* curr char */
    nextch;			/* peek ahead char */

  free (lexbuf);		/* reset lexer buffer */
  lexbuf = xstrdup ("");

  if (lexer_eof ())		/* EOF achieved ? */
    SETTOKEN (tok_eof)

      ch = readnextchar ();	/* read one char */

  if (ch == ' ' || ch == '\t')	/* handle space */
    {
      lexbuf = str_addchar (lexbuf, ch);
      SETTOKEN (tok_space);
    }

  if (isIdentChar (ch))		/* in a 'SYMBOL(' series? */
    {
      while (isIdentChar (ch))
	{
	  lexbuf = str_addchar (lexbuf, ch);
	  ch = readnextchar ();
	}

      lexer_pushchar (ch);	/* push back last char char */

      if (ch == '(' &&		/* found '(' after SYMBOL */
	  gram_hasident (lexbuf))
	SETTOKEN (tok_symbol)
	  else			/* no '(', just a string */
	SETTOKEN (tok_string)
	}

	if (ch == '(')		/* open parenthesis */
	  {
	    lexbuf = str_addchar (lexbuf, '(');
	    SETTOKEN (tok_openpar)
	  }

      if (ch == ')')
	{
	  lexbuf = str_addchar (lexbuf, ')');	/* close parenthesis */
	  SETTOKEN (tok_closepar)
	}

      if (isdigit (ch))		/* some number ? */
	{
	  while (isdigit (ch))
	    {
	      lexbuf = str_addchar (lexbuf, ch);
	      ch = readnextchar ();
	    }
	  lexer_pushchar (ch);
	  SETTOKEN (tok_string)
	}

      if (ch == '\n')		/* newline char ? */
	{
	  lexbuf = str_addchar (lexbuf, '\n');
	  SETTOKEN (tok_newline)
	}

      if (ch == '+')		/* in a +SYMBOL( series? */
	{
	  nextch = lexer_peek ();	/* peek ahead */
	  if (nextch == '+')	/* series of + */
	    {
	      lexbuf = str_addchar (lexbuf, '+');	/* store current + */
	      ch = readnextchar ();

	      while (ch == '+')	/* collect all following */
		{
		  if (lexer_peek () != '+')	/* no more ahead */
		    {
		      lexer_pushchar (ch);	/* push back last read */
		      SETTOKEN (tok_string)	/* return as string */
		    }
		  lexbuf = str_addchar (lexbuf, '+');
		  ch = readnextchar ();
		}
	    }

	  /* else.. next is not a + */
	  if (isIdentChar (nextch))
	    {
	      lexer ();		/* determine recursively */

	      if (lextok == tok_symbol)		/* yes... +SYMBOL, return */
		SETTOKEN (tok_symbol)	/* as SYMBOL */

		  lexer_pushstr (lexbuf);	/* no.. push back */
	      free (lexbuf);
	      lexbuf = 0;
	    }
	  /* and fall through to: single-character token
	   * ch is still the +
	   */
	}

      lexbuf = str_addchar (lexbuf, ch);	/* fall-thru: any character */
      SETTOKEN (tok_anychar)
    }
