/*--------------------------------------------------------------*
 *                                                              *
 *           = array  - Suffx Array Υƥȸ =        *
 *                                                              *
 *  array-main.c - ᥤ롼󡢥桼󥿥ե         *
 *                 ĤΥХѿ                   *
 *                                                              *
 *--------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "sufary.h"
#include "array.h"

eerror ee = NOERROR;
char *ct = " \n\t";
SUFARY *sufary[MAX_ARRAY];                /* ʣSUFARYǼ */
int arrays = 0;                           /* ĻȤäƤ뤫 */
DID *did = NULL;                          /* DIDǼ 981027 */

char keyword[KEYWORD_MAX_LENGTH];  /*  */
DISPLAY_MODE display_mode = DISP_SIMPLE; /* ɽ */
SORT_MODE sort_mode = SORT_ALPHABETICAL; /* Ȥˡ */
int mojibake_proc_flag = 1; /* ʸɻ߽򤹤뤫ɤ */
int kwic_width = 50; /* KWIC */

typedef enum {OP_ERROR = -2, OP_FAIL, OP_DONE, OP_NORMAL, OP_NEAR, OP_EXACT} op_t;

void usage(void);

long *idx2did; /* for docseg 980822 */
long di_size; /* for docseg 980822 */
long txtidx2docid(long *la, int size, long target); /* for docseg 980822 */
static int longcomp(long *i,long *j){return(*i - *j);} /* for docseg 980822 */


/* ޥѴؿ [eresult com_???(char *cmd)] */
eresult com_appendfiles(char *);
SUFARY *appendfiles_adj(char *);
eresult com_select(char *);
eresult com_info(char *);
eresult com_chfiles(char *cmd);
eresult com_closefiles(char *cmd);
eresult com_bye(char *);
eresult com_help(char *);
eresult com_open_did(char *); /* for docseg 980822 */
eresult com_docid(char *); /* for docseg 980822 */

op_t my_sel(SUFARY *ary, char *cmd);
op_t parse_line(const char *line, char *op, char *arg1, char *arg2);

/* ޥɥϥɥơ֥ */
commands sc[] = {
  { "init", (com_init) },
  { "select", (com_select) },
  { "find", (com_select) },{ "search", (com_select) },/* 970523 ޥ */
  { "show", (com_show) },{ "display", (com_show) },/* 970523  ɽޥ */
  { "print", (com_show) }, /* 980127  ɽޥ */
  { "mode", (com_mode_display) },
  { "style", (com_mode_display) }, /* 970523  ɽˡ */
  { "sort", (com_mode_sort) },
  { "order", (com_mode_sort) }, /* 970523  ˡ */
  { "mojibake", (com_mode_mojibake) },
  { "width", (com_set_kwic_width) },
  { "kwidth", (com_set_kwic_width) }, /* 970523  Kwic WIDTH */
  { "line", (com_line) },
  { "get", (com_get) }, /* 980126  */
  { "str", (com_get_str) }, /* 980327 get */
  { "bye", (com_bye) },  { "quit", (com_bye) },  { "exit", (com_bye) },
  { "help", (com_help) },  { "?", (com_help) },
  { "file", (com_chfiles) },
  { "open", (com_appendfiles) }, /* 971213 */
  { "append", (com_appendfiles) }, /* 971213 */
  { "close", (com_closefiles) }, /* 971213 */
  { "info", (com_info) }, /* 980126 ǥХѾ */
  { "opendid", (com_open_did) }, /* 980822 */
  { "odid", (com_open_did) }, /* 980822 */
  { "did", (com_docid) }, /* 980822 */
  /********** ά **********/
  { "i", (com_init) },            /* 970523  Init */
  { "s", (com_select) },         /* 970523  ޥ */
  { "f", (com_select) },         /* 970523  ޥ */
  { "d", (com_show) },            /* 970523  ɽޥ */
  { "sh", (com_show) },           /* 970523  ɽޥ */
  { "p", (com_show) },           /* 980127  ɽޥ */
  { "st", (com_mode_display) },   /* 970523  ɽˡ */
  { "mo", (com_mode_display) },   /* 980126  ɽˡ */
  { "o", (com_mode_sort) },       /* 970523  ˡ */
  { "q", (com_bye) },             /* 96....  Quit */
  { "l", (com_line) },        /* 970523  "\n" Ƕڤ */
  { "kw", (com_set_kwic_width) }, /* 970523  Kwic WIDTH */
/*  { NULL, NULL }*/
};


/**********************************************
 *         eresult errorout(eerror e);
 *
 * purpose
 *   顼åν
 *
 * parameters
 *   e : 顼
 *
 * return value
 *   ץ³⡼
 *
 * description
 *   顼ɤ򸵤˥顼å
 *   롣顼ν٤˱Ʒ³ɤ
 *   ֤
 **********************************************/
eresult errorout(eerror e){
  eresult er = CONT; /* assume no error */

  switch(e){
    case COMMAND:
      printf("command error. ( ? for help )\n");
      er = CONT;
      break;
    case MEMORY:
      printf("memory handling error.\n");
      er = EXIT;
      break;
    case FILEIN:
      printf("file input error.\n");
      er = CONT;
      break;
    case FILEOUT:
      printf("file output error.\n");
      er = CONT;
      break;
    case STRUCTURE:
      printf("data structure is broken.\n");
      er = EXIT;
      break;
    case UNKNOWN:
      printf("unknown error.\n");
      er = EXIT;
      break;
    default:
      break;
    }
  return er;
}

/**********************************************
 *       void main(int argc, char *argv[]);
 *
 * purpose
 *   ᥤ롼󡢥桼󥿥ե
 *
 * description
 *   ޥɰν桼󥿥ե
 *   
 **********************************************/
main(int argc, char *argv[])
{
  char cmd[500];
  char *s;
  commands *cc;
  eresult er;

  FILE *INPUT;
  static int Init_mode; /* ꥽ե뤫ɤ߹िΥե饰 */
  extern char **environ;
  char Init_file_base[1024] ;
  const char *Init_file = "/.arrayrc";

  /*================
    ץ
  ================*/
  while (argc > 1){
    if (argv[1][0] == '-'){
      switch (argv[1][1]){
      case 'R': /*** ǥեȤΥ꥽ե(~/.arrayrc)ɤ ***/
	while (*environ != NULL){
	  if (strstr(*environ,"HOME=") != NULL){
	    sprintf(Init_file_base,"%s%s",*environ+strlen("HOME="),Init_file);
	    break;
	  }
	  environ++;
	}
	Init_mode = 1; /* ե饰ON */
        break;
      case 'r': /*** ꤵ줿꥽եɤ ***/
	strcpy(Init_file_base,argv[2]);
	argc--; argv++;
	Init_mode = 1; /* ե饰ON */
	break;
      case '-':
	(void)usage();
	exit(0);
      default : /* 顼 */
        fprintf(stderr, "%c: ̵ʥץǤ\n", argv[1][1]);
	(void)usage();
        exit(1);
      }
    }else break;
    argc--; argv++;
  }


  /*** ǡե̾ν ***/
  /* textΥեǤ褦ˤۤ */
  if (argc ==2){  /*  1*/
    if (com_appendfiles(argv[1]) == ERROR)
      printf("argument ignored.\n");
  }

  if (argc == 3){ /*  2*/
    if (strlen(argv[1]) + strlen(argv[2]) + 1 < sizeof(cmd)){
      sprintf(cmd, "%s %s", argv[1], argv[2]);
      if (com_appendfiles(cmd) == ERROR)
	printf("argument ignored.\n");
    } else {
      printf("too long argument. ignored.\n");
    }
  }

  if (Init_mode == 1){
    INPUT = fopen(Init_file_base,"r");  /* ꥽ե򳫤 */
    printf ("Initializing...\n");
  }

  /* 桼󥿥եΥ롼 */
  while(1){
    if (Init_mode == 1){ /* ꥽ե뤫ɤ߹ */
      if (!fgets(cmd, (int)sizeof(cmd), INPUT)){ /* ꥽եνλ */
	printf ("Initialized.\n");
	Init_mode = 0; /* ե饰OFF */
	continue;
      }
      printf("> %s",cmd);
    }else{
      printf("ok\n");
      (void)fflush(stdout);
      if (!fgets(cmd, (int)sizeof(cmd), stdin)){
	printf ("\n");
	break; /* EOF */
      }
    }

    s = strtok(cmd, ct);
    if (s == NULL)  /* ϤȤ */
      continue;
    er = ERROR;
    ee = COMMAND;  /* assume command error */
    for ( cc = sc; cc->name != NULL ; cc++){
      if (strcmp(s, cc->name) == 0){
	s = strtok(NULL,"");
	er = (cc->func)(s);  /* ϥɥ¹ */
	break;
      }
    }
    if (er == ERROR)
      er = errorout(ee);
    if (er == CONT)
      continue;
    if (er == EXIT)
      break;
  }
  return(0);
}


/**********************************************
 * com_select(char *cmd);
 **********************************************/
eresult com_select(char *cmd){
  op_t rslt;
  char *s;
  int i;
  long nfound;
  int kensaku_ok = 0;

  /* õ */
  if (!(s = strtok(cmd, "\n"))){
    ee = COMMAND;
    return ERROR;
  }
  
  strcpy(keyword, s); /* 980126 */

  for (i = 0; i < arrays; i++){
    rslt = my_sel(sufary[i], s);
    switch(rslt){
    case OP_ERROR:
      return ERROR;
    case OP_FAIL:
      break;
    case OP_DONE:
      nfound = sa_top(sufary[i])-sa_bottom(sufary[i])+1;
      if (arrays == 1){
	printf("FOUND: %ld\n", nfound);
      } else {
	printf("FOUND[%d]: %ld\n", i, nfound);
      }
      kensaku_ok = 1;
      break;
    }
  }
  if (kensaku_ok == 1)
    return CONT;

  printf("no matching elements.\n");
  return FAIL;
}
/**********************************************
 *            op_t my_sel(SUFARY *ary, char *cmd);
 *
 * purpose
 *    nearν򤷤ʤsa_selƤӽФ
 *
 * parameters
 *   ary : оArray
 *   cmd : ʸ
 *
 * return value
 *   ץ³⡼
 *
 * description
 *   ɤ򸡺롣
 */
op_t my_sel(SUFARY *ary, char *cmd)
{
  char *arg1, *arg2;
  char *op;
  SUFARY suf1, suf2, *backup;
  op_t ret;
  eresult er;

  arg1 = (char *)alloca(strlen(cmd)+1);
  arg2 = (char *)alloca(strlen(cmd)+1);
  op = (char *)alloca(20);
  ret = parse_line(cmd, op, arg1, arg2);
  switch (ret){
  /* 顼 */
  case OP_ERROR:
    ee = COMMAND;
    return OP_ERROR;
    break; /* not reached */
  case OP_FAIL:
    return OP_FAIL;;
    break; /* not reached */
  /* Ƶλ */
  case OP_DONE:
    return OP_DONE;
    break; /* not reached */
  /* ̾︡ */
  case OP_NORMAL:
    if ((er = sa_sel(ary, cmd)) == ERROR)
      return OP_ERROR;
    else if (er == FAIL)
      return OP_FAIL;
    return OP_DONE;
    break; /* not reached */
  /* Υ */
  case OP_NEAR:
  case OP_EXACT:
    backup = ary;
    suf1 = suf2 = *ary;
    /* äԡǸĤʤäȤν򤷤Ƥʤä  */
/*    if ((er = my_sel(&suf2, arg2)) == OP_ERROR) *//* BUG 980618 */
    if ((er = sa_sel(&suf2, arg2)) == ERROR)
      return OP_ERROR;
    if(er == FAIL) return OP_FAIL;
/*    if ((er = sa_sel(&suf1, arg1)) == OP_ERROR) *//* BUG 980618 */
    if ((er = sa_sel(&suf1, arg1)) == ERROR)
      return OP_ERROR;
    if(er == FAIL) return OP_FAIL;

    if (sa_uniq_near(ary, &suf1, &suf2, ((ret == OP_NEAR)? 1:0), atoi(op)+strlen(arg1)) == CONT){
      return OP_DONE;
    } else 
      return OP_FAIL;
    if (sa_top(ary) > 1){
      return OP_DONE;
    } else {
      ary = backup;
      return OP_FAIL;
    }
    break; /* not reached */
  default:
    ee = COMMAND;
    return OP_ERROR;
    break; /* not reached */
  }
  return OP_ERROR; /* not reached */
}

/**********************************************
 * op_t parse_line(const char *line, char *op, char *arg1, char *arg2)
 *
 * purpose
 *    Ԥ黻Ҥʬ䤹
 *
 * parameters
 *   line: ʬоݹ
 *   op  : 黻Ҥΰ()
 *   arg1: 黻Ҥκ()
 *   arg2: 黻Ҥα()
 *
 * return value
 *   黻³⡼
 *
 * description
 *   op, arg1, arg2ϸƤӽФ¦Ѱդ뤳ȡ
 */
op_t parse_line(const char *line, char *op, char *arg1, char *arg2)
{
  /*                      0    5     */
  static char *op_line = "near exact";
  char *s;
  char *p, *q, *r;
  char *buf = (char *)alloca(strlen(line)+1);
  char *op_buf;
  strcpy(buf, line);

  if(!line || !*line){
    return OP_ERROR;
  }

  r = strchr(buf, 0);
  /* [command] ¸ߤʤ */
  if (!(p = strchr(buf, '[')) || !(q = strchr(buf, ']'))){
    return OP_NORMAL;
  }
  if (p == buf || !q[1]){ /* '[' κޤ ']' α¸ߤʤ */
    return OP_ERROR;
  }

  /*
    ƥݥ󥿤ʲΤ褦ˤ롣xΰ֤nullʸ롣
        (1) (2)        (5)  (3) (4)
    arg1   [   command    op   ]   arg2
    buf x      p            x      q   x
               op_buf x   s x
   */

  /*  printf("%s\n%s\n%s\n", buf, p, q);*/

  /* (1) '[' Υڡ٤*/
  s = p;
  if (isspace(p[-1])){
    for (s = p-1; s>buf && isspace(s[-1]); s--)
      ;
    if (s == buf)
      return OP_ERROR;
  }
  *s = 0;

  /* (2) '[' θΥڡ٤*/
  s = p+1;
  if (isspace(p[1])){
    for (s = p+1; s<q && isspace(*s); s++)
      ;
    if (s == q)
      return OP_ERROR;
  }
  p = s;

  /* (3) ']' Υڡ٤*/
  s = q;
  if (isspace(q[-1])){
    for (s = q-1; s>p && isspace(s[-1]); s--)
      ;
    if (s == p)
      return OP_ERROR;
  }
  *s = 0;

  /* (4) ']' θΥڡ٤*/
  s = q+1;
  if (isspace(q[1])){
    for (s = q+1; s<r && isspace(*s); s++)
      ;
    if (s == r)
      return OP_ERROR;
  }
  q = s;

  strcpy(arg1, buf);
  strcpy(arg2, q);

  /* (5) operandΥڡä */
  op_buf = (char *)alloca(strlen(p)+1);
  strcpy(op_buf, p);

  for (s = op_buf; *s && !isspace(*s); s++)
    ;
  *s++ = 0;

  r = strstr(op_line, op_buf);
  if (!r)
    return OP_ERROR;
  switch (r - op_line){
  case 0:
    strcpy(op, s);
    return OP_NEAR;
  case 5:
    strcpy(op, s);
    return OP_EXACT;
  default:
    return OP_ERROR;
  }
  return OP_NORMAL;/* not reached*/
}

/**********************************************
 *         eresult com_appendfiles(char *cmd);
 *
 * purpose
 *   ƥȥե롢arrayե򳫤
 * parameters
 *   cmd : ޥɥ饤
 * return value
 *    ץ³⡼
 * description
 *    cmd 1ĤΥե̾Ϳˤϡ.ary (Rel1.4: .pat -> .ary)
 *    դե arrayfile Ȥ롣
 **********************************************/
eresult com_appendfiles(char *cmd)
{
  SUFARY *s;

  if ((s = appendfiles_adj(cmd)) == NULL){
    return ERROR;
  }
  return CONT;
}

/**********************************************
 *         SUFARY *appendfiles_adj(char *cmd);
 *
 * purpose
 *   ƥȥե롢arrayե򳫤
 * parameters
 *   cmd : ޥɥ饤
 * return value
 *    ˻ȤSUFARY *
 * description
 *    cmd 1ĤΥե̾Ϳˤϡ.ary (Rel1.4: .pat -> .ary)
 *    դե arrayfile Ȥ롣
 **********************************************/

SUFARY *appendfiles_adj(char *cmd)
{
  char *s, *t;
  SUFARY *ARRAY;

  s = strtok(cmd, ct); /* ե̾ͿƤʤ顼 */
  if (s == NULL){
    ee = COMMAND;
    return NULL;
  }

  t = strtok(NULL, ct); /* 2ܤΰ(array file) */

  if(arrays >= MAX_ARRAY || (sufary[arrays] = sa_openfiles(s,t)) == NULL){
    ee = FILEIN;
    return NULL;
  }
  arrays++;

  return sufary[arrays-1];
}

/**********************************************
 *         eresult com_closefiles(char *cmd);
 *
 * purpose
 *   ƥȥե롢arrayեĤ
 * parameters
 *   cmd : ޥɥ饤
 * return value
 *    ץ³⡼
 * description
 *    ƤSUFARY򤹤٤Ĥ롣

 **********************************************/
eresult com_closefiles(char *cmd)
{
  int i;
  for (i = 0; i < arrays; i++){
    sa_closefiles(sufary[i]);
    sufary[i] = NULL;
  }
  arrays = 0;
  return CONT;
}

/**********************************************
 *         eresult com_chfiles(char *cmd);
 *
 * purpose
 *   ƥȥե롢arrayե򳫤ľ
 * parameters
 *   cmd : ޥɥ饤
 * return value
 *    ץ³⡼
 * description
 *   ٤SUFARYĤcmdΥեΤߤ򥪡ץ󤹤
 **********************************************/
eresult com_chfiles(char *cmd)
{
  char *s, *t;

  com_closefiles(NULL);
  return com_appendfiles(cmd);
}


/**********************************************
 *         void usage(void)
 *
 * purpose
 *   С󡢥ޥɥ饤󥪥ץ
 **********************************************/
void usage(void){
  fprintf(stderr,"array (SUFARY Version 2.0 1998.6.2)\n");
  fprintf(stderr,"  NAIST Computational Linguistics Labolatry\n");
  fprintf(stderr,"   http://cl.aist-nara.ac.jp/lab/nlt/ss/\n\n");
  fprintf(stderr,"USAGE   array [ -R | -r filename ] text_filename [ array_filename ]\n");
  fprintf(stderr,"OPTION\n");
  fprintf(stderr,"  -R            : ~/.arrayrc ꥽եȤɤ߹\n");
  fprintf(stderr,"  -r <filename> : ꥽ե\n");
}


/**********************************************
 *         eresult com_info(char *cmd);
 *
 * purpose
 *   ǥХѤξɽ
 *
 * parameters
 *   cmd : ̤
 *
 * return value
 *   ץ³⡼
 **********************************************/
eresult com_info(char *cmd)
{
  char *s, *t;
  int i;

  printf("\n*** File Information***\n");
  printf("----+-----------+------------+-------------------------------------------\n");  printf(" No.| Text Size | Array Size | File Name\n");
  printf("----+-----------+------------+-------------------------------------------\n");
  for(i = 0; i < arrays; i++){
    printf("%3d | %9ld | %10ld | %s\n",\
	   i,sufary[i]->txtsz,sufary[i]->arysz,sufary[i]->filename);
  }
  printf("----+-----------+------------+-------------------------------------------\n");
  printf("\n");
  printf("*** Keyword ***\n[%s]\n",keyword);
  printf("*** Mojibake Syori *** 0 OFF, 1 ON\n[%d]\n",mojibake_proc_flag);
  printf("*** Display Mode *** 0 KWIC, 1 INDEX, 2 LINE, 3 SIMPLE\n[%d]\n",display_mode);
  printf("*** Sort Mode *** 0 INDEX, 1 ALPHABETICAL\n[%d]\n",sort_mode);
  printf("*** KWIC Width ***\n[%d]\n",kwic_width);

  return CONT;
}

/**********************************************
 *         eresult com_bye(char *cmd);
 *
 * purpose
 *   ץཪλ
 *
 * parameters
 *   cmd : ̤
 *
 * return value
 *   ץ³⡼
 **********************************************/
eresult com_bye(char *s)
{
  printf("bye.\n");
  return EXIT;
}

/**********************************************
 *         eresult com_help(char *cmd);
 *
 * purpose
 *   إץåɽ
 *
 * parameters
 *   cmd : ̤
 *
 * return value
 *   ץ³⡼
 *
 * description
 *
 **********************************************/
eresult com_help(char *s)
{
  printf("Following commands are available.\n");
  printf(" init\n");
  printf(" search (string)\n");
/*  printf(" file (filename)\n");*/
  printf(" open (filename)\n");
  printf(" close\n");
  printf(" show (from) (to)\n");
  printf(" style {index|simple|kwic|line}\n");
  printf(" order {index|alphabetical}\n");
  printf(" line (pointer)\n");
  printf(" str (pointer) (number of char)\n");
  printf(" get (pointer) (delimiter-1) (delimiter-2)\n");
  printf(" mojibake {ON|OFF}\n");
  printf(" kwidth (integer: width for kwic)\n");
  printf(" info\n");
  printf(" quit\n");
  printf("\n"
	 " [i]                   init\n" 
	 " [s][f][find][select]  search\n"
	 " [sh][d][display][p]   show\n"
	 " [st][mode][mo]        style\n"
	 " [o][sort]            order\n"
	 " [q][bye][exit]        quit\n" 
	 " [kw][width]           kwidth\n" 
	 " [l]                   line\n" 
	 );

  return CONT;
}



/**********************************************
 *         eresult com_open_did(char *cmd);
 *
 * purpose
 *   ɥʬ־ե򳫤
 * parameters
 *   cmd : ޥɥ饤
 * return value
 *    ץ³⡼
 * description
 **********************************************/
eresult com_open_did(char *cmd)
{
  cmd[strlen(cmd)-1] = '\0';
  if(did != NULL) sa_closedid(did);
  if((did = sa_opendid(cmd)) == NULL){
    ee = FILEIN;
    return ERROR;
  }
  return CONT;
}


eresult com_docid(char *cmd){
  long lg;
  if(sscanf(cmd,"%ld",&lg) == 0 || did == NULL) return ERROR;
  sa_didsearch(did, lg);
  printf("from=%ld size=%ld\n",sa_doc_start(did),sa_doc_size(did));
  return CONT;
}


