/*  VER 144  TAB P   $Id: sys.c,v 1.23 1999/04/24 18:13:31 src Exp $
 *
 *  handle the local sys or newsfeeds file
 *
 *  copyright 1996, 1997 Egil Kvaleberg, egil@kvaleberg.no
 *  the GNU General Public License applies
 *
 *  $Log: sys.c,v $
 *  Revision 1.23  1999/04/24 18:13:31  src
 *  Allowing '@' in newsfeeds syntax, making a special case of it
 *  in the group list
 *
 *  Revision 1.22  1999/04/24 17:55:51  src
 *  Improved message re. syntax errors
 *
 *  Revision 1.21  1999/04/18 09:29:47  src
 *  --groups with INN syntax
 *
 *  Revision 1.20  1999/04/17 07:10:05  src
 *  Repaired --groups behaviour
 *
 *  Revision 1.19  1999/04/07 07:25:16  src
 *  Fixed backslash-something
 *
 *  Revision 1.18  1999/03/24 03:53:02  src
 *  Implemented "newsx" as magic exclude pattern
 *
 *  Revision 1.17  1999/03/09 11:43:57  src
 *  Cosmetic fixes
 *
 *  Revision 1.16  1999/03/07 14:58:19  src
 *  Read newsconfig supported. Storage API supported.
 *
 *  Revision 1.15  1998/11/24 17:25:42  src
 *  Version 1.3pre2: Repaired INN mode hamdling of ME entry
 *
 *  Revision 1.14  1998/11/22 18:02:32  src
 *  Filtering repaired
 *
 *  Revision 1.13  1998/11/22 14:00:44  src
 *  Cleaned up sys_group() interface
 *
 *  Revision 1.12  1998/09/11 09:17:44  src
 *  Check path consistency (--no-path) and length (--max-path)
 *  GNU style option --help, --version, --dry-run, changed --noxx to --no-xx
 *  Check for putenv and setenv, added xstrcpy
 *
 *  Revision 1.11  1998/09/09 07:32:14  src
 *  Version 1.1
 *
 *  Revision 1.10  1998/09/03 05:14:30  src
 *  Improved bad syntax messages
 *
 *  Revision 1.8  1998/09/03 02:49:31  src
 *  Fixed stuff detected by -Wall
 *
 *  Revision 1.7  1998/09/02 06:50:31  src
 *  newsx version 1.0
 *
 *  Revision 1.6  1998/09/02 06:34:41  src
 *  Support @-syntax in newsfeeds, and support AUTHINFO GENERIC
 */

#include "common.h"
#include "proto.h"
#include "options.h"
#include "news.h"
#include "newsconfig.h"

/* 
 *  globals for sys
 */
typedef struct sys_grp {
    struct sys_grp *next;
    char not;
    char name[1]; /* extend as required... */
} SYS_GRP;

static SYS_GRP *sysgrp = 0;         /* newsfeed entry */
static SYS_GRP *sys_me = 0;         /* ME entry for Cnews */
static SYS_GRP *sys_extra = 0;      /* from command line */

/*
 *  add group in sys-file
 *  new items are inserted at the top of the list, 
 *  so the list will appear in the opposite sequence of the original
 */
static void
sys_group(char *group,SYS_GRP **sys,int not)
{
    int n; 
    SYS_GRP *sp;

    group = skipsp(group);
    if (!*group) return;

    n = sizeof(SYS_GRP) + strlen(group);

    sp = xmalloc(n);

    /* fill in structure */
    sp->next = 0; 
    sp->not = not;
    strcpy(sp->name,group);

    /* insert at top of list */
    sp->next = *sys;
    *sys = sp;
}

/*
 *  add extra newsgroup exclusion
 */
void
add_extra(char *grp_list)
{
    char *p;
    int not;

    for (;;) {
	if ((p = strchr(grp_list,','))) *p = '\0';

	grp_list = skipsp(grp_list);
	if (*grp_list == '\\') 
	    ++grp_list; /* occurs due to bash behaviour */
	if (*grp_list == '!' || *grp_list == '@') {
	    ++grp_list;
	    not = 1;
	} else {
	    not = 0;
	}
	log_msg(L_DEBUGMORE,"add --groups %s \"%s\"",not?"exclude":"include",
						 grp_list);
	sys_group(grp_list,&sys_extra,not);

	if (!p) break; /* last one? */

	*p = ',';
	grp_list = p+1;
    }
}

/*
 *  for C News:
 *  check if group name matches pattern
 *  return match "worth" if it does, or 0 if not
 */
static int
sys_match(char *group,char *pattern)
{
    int n;

    if (pattern[0]=='a' && pattern[1]=='l' && pattern[2]=='l') {
	switch (pattern[3]) {
	case '\0': /* e.g. "rec.humor" with pattern "all" */
	    return 1; 
	case '.':  /* e.g. "rec.humor" with pattern "all.something" */
	    group = strchr(group,'.');
	    n = sys_match(group ? group+1 : "",pattern+4);
	    return n ? n+99 : 0;
	}
    }

    /* not "all" */
    while (*group) {
	if (*pattern== '\\' )
	    ++pattern;
	if (*group == '.') {
	    switch (*pattern) {
	    case '\0': /* e.g. "rec.humor" with pattern "rec" */
		return 1; 
	    case '.':  /* e.g. "rec.humor" with pattern "rec.all" */
		n = sys_match(group+1,pattern+1);
		return n ? n+100 : 0;
	    default:
		/* e.g. "rec.humor" with pattern "recsomething" */
		return 0;
	    }
	}
	if (*group++ != *pattern++) return 0;
    }

    /* group exhausted */
    switch (*pattern) {
    case '\0': 
	return 100; /* exact match */
    case '.':
	/* "alt.humor" when pattern "alt.humor.all" does not match */
    default:
	return 0; /* no match */
    }
}

/*
 *  is group allowed in sys-file?
 *
 *  Behaviour for INN, from newsfeeds(5):
 *
 *      For example,  to  receive  all
 *      ``comp''  groups,  but  only  comp.sources.unix within the
 *      sources newsgroups, the following set of patterns  can  be
 *      used:
 *          comp.*,!comp.sources.*,comp.sources.unix
 *      There  are  three  things to note about this example.  The
 *      first is that the trailing ``.*'' is required.  The second
 *      is  that,  again, the result of the last match is the most
 *      important.  The third is that ``comp.sources.*'' could  be
 *      written  as  ``comp.sources*'' but this would not have the
 *      same effect if there were a ``comp.sources-only'' group.
 * 
 *  Further quote from newsfeeds(5):
 *
 *      There  is  also  a  way  to subscribe to a newsgroup nega-
 *      tively.  That is to say, do not send this  group  even  if
 *      the article is cross-posted to a subscribed newsgroup.  If
 *      the first character of a pattern is an  atsign  ``@'',  it
 *      means that any article posted to a group matching the pat-
 *      tern will not be sent  even  though  the  article  may  be
 *      cross-posted  to  a  group  which is subscribed.  The same
 *      rules of precedence apply in that the last  match  is  the
 *      one which counts.
 *
 *  Since we only use these patterns for deciding which groups
 *  to fetch we should treat '@' and '!' the same way: do not
 *  fetch the group. But we need to support the syntax -- PMM 08/1998
 */
static int 
sys_allow_grp_inn(char *group, SYS_GRP *sys_grp)
{
    SYS_GRP *sp = sys_grp;
    int nots_only = 1;

    /*
     * INN:
     * the last match is the most important
     * since our version of the list is stored with the last
     * item first, we can get away by simply returning the
     * status of *our* first match.
     */
    for (; sp; sp = sp->next) {
	if (!(sp->not)) nots_only = 0;
	if (wildmat(group, sp->name)) {
	    /* if !-flag or @-flag, then not allowed, otherwise all right */
	    return !(sp->not);
	}
    }
    if (nots_only && sys_grp == sys_extra) {
	/* command line with exclusions only */
	/* default to a yes, otherwise it would have been meaningless */
	return 1;
    }
    return 0; /* default to a no-no */
}

/*
 *  is group allowed in sys-file?
 *
 *  C News:
 *  the order is not significant.
 *  when more than one pattern matches:
 *  if the longest matched pattern is longer than the longest
 *  mismatched pattern, it is a match. otherwise, it is not.
 *  longest is measured as number of words, "all" counting
 *  sligthly less than other words
 */
static int 
sys_allow_grp_cnews(char *group, SYS_GRP *sp)
{
    int n;
    int best_n = 0;
    int result = 0;

    for (; sp; sp = sp->next) {
	if ((n=sys_match(group,sp->name))) {
	    if (sp->not) {
		if (n >= best_n) {
		    best_n = n;
		    result = 0;
		}
	    } else {
		if (n > best_n) {
		    best_n = n;
		    result = 1;
		}
	    }
	}
    }
    return result;
}

/*
 *  is group allowed in sys-file?
 */
static int 
sys_allow_grp(char *group, SYS_GRP *sys_grp)
{
    return (inn_opt) ? sys_allow_grp_inn(group, sys_grp)
		     : sys_allow_grp_cnews(group, sys_grp);
}

/*
 *  is group allowed in sys-file?
 */
int 
sys_allow(char *group)
{
    if (sys_extra && !sys_allow_grp_inn(group, sys_extra)) {
	log_msg(L_DEBUGMORE,"group %s excluded by --groups",group);
	return 0;
    }
    if (sys_me && !sys_allow_grp(group, sys_me)) {
	log_msg(L_DEBUGMORE,"group %s not accepted by ME",group);
	return 0;
    }
    if (!sys_allow_grp(group, sysgrp)) {
	log_msg(L_DEBUGMORE,"group %s disallowed by %s",group, cfg_newsfeeds);
	return 0;
    }
    return 1;
}

/*
 *  pick an item from a C News or INN style sys file
 *  items are:
 *      text
 *      "!" 
 *      "/" 
 *      "," 
 *      ":" 
 *      "#" 
 *      "\n"
 *      "@" now added -- PMM
 *
 *  it turns out the '@'-sign has a special meaning only in group names
 *  this syntax is so ugly it can make you sick, but we try to constrain
 *  ourselves and try to make the best out of it
 */
static void 
sys_item(char *item)
{
    static int state = 0;
    static int lineno = 1;
    static SYS_GRP **sys = &sysgrp;

    switch (state) { 
	/*
	 *  state: site
	 */
    case 0:
    default:               
	switch (*item) {
	case '#':
	    state = 1;
	    break;
	case '\n':
	    break;
	case '!':
	case '/':
	case ',':
	case ':':
	    /* error */
	  bad_syntax:
	    log_msg(L_ERR,"bad syntax in %s line %d , '%s' not expected (state %d)",
			      cfg_newsfeeds, lineno, item,                 state);
	    state = 1;
	    break;
	default:
	    if (strcmp(item,"ME") == 0) {
		/*
		 * the handling of ME is differs significantly depending on
		 * which news system we are using.
		 *
		 * For Cnews, from newssys(5):
		 *   The ME line, or a line whose system
		 *   name is explicitly that of the machine news is running on,
		 *   has a rather different meaning from that of the other  sys
		 *   file  lines:  its  subscriptions  subfield  identifies the
		 *   newsgroups that this site subscribes to (i.e.  is  willing
		 *   to  receive)
		 *
		 * BUG: explicit machine name from whoami not implemented.
		 *
		 * For INN, from newsfeeds(5):
		 *   If the ME entry has a subscription list, then that list is
		 *   automatically  prepended  to  the subscription list of all
		 *   other entries.
		 */
		log_msg(L_DEBUGMORE,"found ME in %s", cfg_newsfeeds);
		state = 2;
		if (inn_opt) sys = &sysgrp; /* INN: prepend to general entry */
		else sys = &sys_me;         /* Cnews: local subscription */
	    } else if (strcmp(item,spoolname) == 0) {
		log_msg(L_DEBUG,"found entry for %s in %s", item, cfg_newsfeeds);
		state = 2;
		sys = &sysgrp;
	    } else {
		log_msg(L_DEBUGMORE,"skipping entry for %s in %s",
							item, cfg_newsfeeds);
		state = 1;
	    }
	    break;
	}
	break;

	/* 
	 *  state: skip rest of line
	 */
    case 1:                
	switch (*item) {
	default:
	    break;
	case '\n':
	    state = 0;
	    break;
	}
	break;

	/* 
	 *  state: await exclusions or groups
	 */
    case 2:
	switch (*item) {
	case ':':
	    /* only use this default if there are no exclusions */
	    /* BUG: verify that this is correct */
	    if (sys == &sysgrp) add_exclusion(spoolname);
	    state = 3;
	    break;
	case '#':
	    state = 1;
	    break;
	case '\n':
	    state = 0;
	    break;
	case '/': /* a list of exclusions to follow */
	    if (sys == &sysgrp) state = 5;
	    break;
	default:
	    goto bad_syntax;
	}
	break;

	/* 
	 *  state: exclusions
	 */
    case 5:
	switch (*item) {
	case ':':
	    state = 3;
	    break;
	case '#':
	    state = 1;
	    break;
	case '\n':
	    state = 0;
	    break;
	case '!':
	case '/':
	    goto bad_syntax;
	case ',': /* ignore */
	    break;
	default:
	    add_exclusion(item);
	    break;
	}
	break;

	/* 
	 *  state: groups
	 */
    case 3:
    case 4: /* seen a '!' or a '@' */
    again:
	switch (*item) {
	case ':': /* ignore flags */
	case '/': /* ignore distlist */
	case '#':
	    state = 1;
	    break;
	case '\n':
	    state = 0;
	    break;
	case '!':
	    state = 4;
	    break;
	case '@':
	    /* NOTE: '@' is not parsed as a special character */
	    state = 4;
	    ++item;
	    goto again;
	case ',':
	    state = 3; 
	    break;
	default:
	    log_msg(L_DEBUGMORE,"sys group %s%s",(state==4?"!":""),item);
	    sys_group(item,sys,state==4);
	    state = 3;
	    break;
	}
    }
    if (*item == '\n') ++lineno;
}

/*
 *  pick a line from a C News or INN style sys file
 *
 *  lines may be continued by using a trailing slash
 *
 *  format is:
 *      site/exclusions:grouplist/distlist:flags:cmd
 *
 *  site is (short) name of spool
 *  exclusions are things in path we don't want to be bothered with
 *  grouplist is:
 *      group
 *      group,grouplist
 *      !group
 *      @group           -- added: PMM
 *      all                 matches everything (like *)
 *      something.all       matches something.*
 *  distlist is a list of distributions
 *
 *  returns offset required when tokens continue
 */
static int 
sys_line(char *line, int offset)
{
    char what[2];
    int n;
    char *p;
    char *start = offset ? line : 0;

    /* quite rare case first: overflowed line continuation */
    if (line[0]=='\n' && !line[1] && offset >= 1) {
	for (n = 0; n < offset; ++n) {
	    if (line[offset-(n+1)] != '\\') break;
	}
	if (n & 1) {
	    line[--offset] = '\0';
	    return offset;
	}
    }

    for (p = line+offset; ;++p) {
	switch (*p) {
	case '#':
	case '/':
	case '!':
	case ':':
	case ',':
	case '\n':
	    /* separator */
	    what[0] = *p;
	    what[1] = '\0';
	    if (start) {
		*p = '\0';
		sys_item(start);
		*p = what[0];
		start = 0;
	    }
	    sys_item(what);
	    break;
	case ' ':
	case '\t':
	    *p = '\0';
	    if (start) sys_item(start);
	    start = 0; /* start collecting again */
	    break;
	case '\\':
	    switch (p[1]) {
	    case '\n': 
		/* line continuation */
		if (!p[2]) {
		    if (start) {
			/* token already collected */
			*p = '\0';
			if (start != line) strcpy(line,start);
			return strlen(line); /* offset */
		    } else {
			return 0;
		    }
		} else {
		    strcpy(p,p+2);
		    --p;
		    /* completely ignore backslash followed by newline */
		    break;
		}
		break;
	    case '\0': 
		/* long line */
		if (!start) start = p;
		if (start != line) strcpy(line,start);
		return strlen(line); /* offset */
	    default:
		/* always include the next character as part of the token */
		strcpy(p,p+1);
		if (!start) start = p;
		break;
	    }
	    break;
	case '\0':
	    if (start) {
		/* very long line, and token already collected */
		if (start != line) strcpy(line,start);
		return strlen(line); /* offset */
	    } 
	    return 0;
	default:
	    if (!start) start = p;
	    break;
	}
    }
}

/*
 *  load local sys or newsfeeds file
 *
 *  lines may be continued by using a trailing slash
 *
 *  the C News format is:
 *      site/exclusions:grouplist/distlist:flags:cmds
 *  the INN format is:
 *      site/exclusions:grouplist/distlist:flags:param
 *
 *  site is (short) name of spool
 *  exclusions is things in patch we don't want to be bothered with
 *  grouplist is:
 *      group
 *      group,grouplist
 *      !group
 *      group/distlist
 *  group is:
 *      groupelem           matches groupelem.*
 *      groupelem.groupelem
 *  groupelem is: 
 *      all                 matches everything (C News)
 *      *                   matches everything (INN)
 *      something           matches something
 *  distlist is a list of distributions that we simply ignore
 *  since distributions are quietly going out of fashion anyway
 */
void 
load_sys(void)
{
    FILE *f;
    char buf[BUFSIZ];
    int offset;

    progtitle("read sys");

    if (!(f = fopen(cfg_newsfeeds,"r"))) {
	log_msg(L_ERRno,"can't open \"%s\"",cfg_newsfeeds);
	exit_cleanup(8);
    }
    log_msg(L_DEBUG,"reading %s",cfg_newsfeeds);
    if (!is_regular(f,cfg_newsfeeds,NULL)) {
	fclose(f);
	exit_cleanup(8);
    }
    /* load local sys file */
    offset = 0;
    while (fgets(buf+offset,BUFSIZ-offset,f)) {
	offset = sys_line(buf,offset);
	if (offset >= BUFSIZ-3) {
	    log_msg(L_ERRno,"token too long in \"%s\"",cfg_newsfeeds);
	    exit_cleanup(8);
	}
    }
    fclose(f);

    /* check it they specified the special newsx path */
    newsx_path = path_match(NEWSX_PATH);
    if (newsx_path)
	log_msg(L_DEBUGMORE,"special %s exclude detected",NEWSX_PATH);
}
