/* File "symbols.c":
 * Administrates the name and atoms for each symbol. */

/* This file is part of Malaga, a system for Left Associative Grammars.
 * Copyright (C) 1995-1998 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "files.h"
#include "malaga_files.h"
#include "sym_type.h"

#undef GLOBAL
#define GLOBAL

#include "symbols.h"

/*---------------------------------------------------------------------------*/

LOCAL struct /* this is the symbol table */
{
  long_t symbols_size;     /* <symbols>[<i>] contains
                            * name and atoms for symbol <i>. */
  symbol_entry_t *symbols;
  symbol_t *symbol_values; /* all symbols in order of their codes
			    * (same length as <symbol>) */
  symbol_t *symbols_alph;  /* all symbols in alphabetical order
			    * (same length as <symbols>) */
  
  long_t values_size;      /* lists of atomic symbols */
  symbol_t *values;
  long_t strings_size;     /* symbol names */
  char *strings;
} symbol_table;

/*---------------------------------------------------------------------------*/

GLOBAL value_t symbol_to_value (symbol_t symbol)
/* Return a value that consists of <symbol>. */
{
  DB_ASSERT (symbol < symbol_table.symbols_size);
  return symbol_table.symbol_values + symbol;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t symbol_name (symbol_t symbol)
/* Return the name of <symbol>. */
{
  DB_ASSERT (symbol < symbol_table.symbols_size);
  return symbol_table.strings + symbol_table.symbols[symbol].name;
}

/*---------------------------------------------------------------------------*/

LOCAL symbol_t get_symbol (string_t name)
/* Find a symbol by <name> in the symbol table and return its code.
 * Return (symbol_t) -1 if the symbol doesn't exist. */
{
  /* We do a binary search on the symbols in <symbols_alph>. */
  long_t lower = 0;
  long_t upper = symbol_table.symbols_size - 1;

  while (lower < upper) 
  {
    long_t middle;
    symbol_t middle_symbol;
    byte_t comp_result;
    
    middle = (lower + upper) / 2;
    middle_symbol = symbol_table.symbols_alph[middle];
    comp_result = strcmp_no_case (name, symbol_name (middle_symbol));
    if (comp_result < 0)
      upper = middle - 1;
    else if (comp_result > 0)
      lower = middle + 1;
    else
      lower = upper = middle;
  }
  
  if (lower == upper) 
  {
    symbol_t symbol = symbol_table.symbols_alph[lower];
    
    if (strcmp_no_case (name, symbol_name (symbol)) == 0)
      return symbol;
  }
  return -1;
}

/*---------------------------------------------------------------------------*/

GLOBAL symbol_t find_symbol (string_t name)
/* Find symbol <name> in the symbol table and return its code. */
{
  symbol_t symbol;

  symbol = get_symbol (name);
  if (symbol == (symbol_t) -1)
    error ("unknown symbol \"%s\"", name);

  return symbol;
}

/*---------------------------------------------------------------------------*/

GLOBAL symbol_t find_multi_symbol (value_t atom_list)
/* Find the multi-symbol for <atom_list> and return its code. 
 * If there is no multi-symbol for <atom_list>, report an error. */
{
  if (get_element (atom_list, 2) == NULL)
  { /* Convert a list of one symbol to this atomic symbol. */
    value_t atom = get_element (atom_list, 1);
    
    if (atom != NULL)
    {
      symbol_t symbol = value_to_symbol (atom);
      
      if (symbol_table.symbols[symbol].atoms == -1)
	return symbol;
    }
  }
  else 
  { /* Convert a list of more than one symbol to a multi symbol. */
    symbol_t i;
    value_t canonical_atom_list = canonise_atom_list (atom_list);
    
    for (i = 0; i < symbol_table.symbols_size; i++)
    {
      long_t atoms = symbol_table.symbols[i].atoms;
      
      if (atoms != -1)
      {
	if (values_equal (canonical_atom_list, symbol_table.values + atoms))
	  return i;
      }
    }
  }

  /* Did not find an entry */
  error ("no multi-symbol for this atom list");
}

/*---------------------------------------------------------------------------*/

GLOBAL bool_t is_symbol (string_t name)
/* Return whether <symbol> is really a symbol. */
{
  return (get_symbol (name) != (symbol_t) -1);
}

/*---------------------------------------------------------------------------*/

GLOBAL long_t number_of_symbols (void)
/* Return the number of symbols defined. */
{
  return (symbol_table.symbols_size);
}

/*---------------------------------------------------------------------------*/

GLOBAL value_t atoms_of_symbol (symbol_t symbol)
/* Return the list of symbols in the multi-symbol table for <symbol>. */
{
  long_t atoms; /* pointer into <symbol_table.values> */

  DB_ASSERT (symbol < symbol_table.symbols_size);

  /* Look for the symbol in the symbol table. */
  atoms = symbol_table.symbols[symbol].atoms;
  if (atoms != -1)
    return symbol_table.values + atoms;
  else
    return NULL;
}

/*---------------------------------------------------------------------------*/

LOCAL int compare_symbols (const void *symbol1, const void *symbol2)
/* Return -1 if <symbol1> <  <symbol2> (lexically)
 *         0 if <symbol1> == <symbol2> (lexically)
 *         1 if <symbol1> >  <symbol2> (lexically). */
{
  return strcmp_no_case (symbol_name (*(symbol_t *) symbol1), 
			 symbol_name (*(symbol_t *) symbol2));
}

/*---------------------------------------------------------------------------*/

GLOBAL void read_symbol_file (string_t file_name)
/* Read <symbol_table> from file <file_name>. */
{
  FILE *stream;
  symbol_header_t header;
  long_t i;
  
  stream = fopen_save (file_name, "rb");
  fread_save (&header, sizeof (header), 1, stream, file_name);
  check_header (&header.common_header, file_name, 
		SYMBOL_FILE, SYMBOL_CODE_VERSION);
  
  symbol_table.symbols_size = header.symbols_size;
  symbol_table.symbols = 
    (symbol_entry_t *) fread_block (sizeof (symbol_entry_t), 
				    header.symbols_size, stream, file_name);
  symbol_table.values_size = header.values_size;
  symbol_table.values = (cell_t *) fread_block (sizeof (cell_t), 
						header.values_size, 
						stream, file_name);
  symbol_table.strings_size = header.strings_size;
  symbol_table.strings = (char *) fread_block (sizeof (char), 
					       header.strings_size, 
					       stream, file_name);

  fclose_save (stream, file_name);
  
  /* Build an alphabetically sorted list of all symbols (for fast access)
   * as well as a symbol list sorted by their codes. */
  symbol_table.symbol_values = ((symbol_t *) 
				new_vector (sizeof (symbol_t),
					    symbol_table.symbols_size));
  symbol_table.symbols_alph = ((symbol_t *) 
			       new_vector (sizeof (symbol_t),
					   symbol_table.symbols_size));
  for (i = 0; i < header.symbols_size; i++) 
  {
    symbol_table.symbol_values[i] = i;
    symbol_table.symbols_alph[i] = i;
  }
  qsort (symbol_table.symbols_alph, symbol_table.symbols_size, 
	 sizeof (symbol_t), compare_symbols);
  
  /* Set function variables for "values". */
  values_symbol_name = symbol_name;
  values_atoms_of_symbol = atoms_of_symbol;
}

/*---------------------------------------------------------------------------*/

GLOBAL void free_symbol_table (void)
/* Free all memory used by <symbol_table>. */
{
  free (symbol_table.symbols);
  free (symbol_table.symbol_values);
  free (symbol_table.symbols_alph);
  free (symbol_table.values);
  free (symbol_table.strings);
}
