/* File "basic.c":
 * Basic types, macros and functions that are used everywhere. */

/* This file is part of Malaga, a system for Left Associative Grammars.
 * Copyright (C) 1995-1998 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* includes =================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "basic.h"

#undef GLOBAL
#define GLOBAL

/* constants ================================================================*/

GLOBAL char lower_letter[256] =
/* This table contains the lower case letter for each letter code,
 * and 0 for each non-letter. */
#ifdef HANGUL
{
  /* This table is for romanised Hangul. */
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,      '\xa1', '\xa2', '\xa3', '\xa4', '\xa5',      0, '\xa7', 
  '\xa8', '\xa9',      0, '\xab', '\xac', '\xad', '\xae', '\xaf',
  '\xb0', '\xb1',      0, '\xb3', '\xb4', '\xb5',      0, '\xb7', 
  '\xb8',   0,    '\xba',      0,      0,      0,      0,      0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0
};
#endif
#ifdef LATIN1
{
  /* This table is for Latin1 char code. */
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '',   0,   0,   0,   0,   0,
  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, '',   0,   0,   0,   0,   0,
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '',   0, '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '',   0, '', '', '', '', '', '', '', ''
};
#endif

/* memory functions =========================================================*/

GLOBAL void *new_mem (u_long_t item_size)
/* Allocate a memory block of size <item_size>, clear it and return it.
 * If memory is out, call the function "error". */
{
  void *block;

  block = calloc (1, item_size);

  if (block == NULL)
    error ("out of memory");

  return block;
}

/*---------------------------------------------------------------------------*/

GLOBAL void *new_vector (u_long_t item_size, u_long_t items)
/* Allocate a vector of <items> items of size <item_size> each, clear it
 * and return it. If memory is out, call the function "error". */
{
  void *block;

  if (items == 0)
    return 0;

  block = calloc (items, item_size);

  if (block == NULL)
    error ("out of memory");

  return block;
}

/* string functions =========================================================*/

GLOBAL string_t copy_string (string_t to, string_t from, string_t to_end)
/* Copy <from> to <to>, but do not write behind <to_end>.
 * The string in <to> will be terminated by EOS.
 * If <from> doesn't fit into new string, an error occurs.
 * The pointer that is returned points to the trailing EOS. */
{
  while (*from != EOS && to < to_end - 1)
    *to++ = *from++;

  if (*from != EOS)
    error ("string too long");

  if (to_end > to)
    *to = EOS;

  return to;
}

/*---------------------------------------------------------------------------*/

LOCAL long_t readable_length (string_t string, string_t string_end)
/* Return the length of "string" when converted with "copy_string_readable".
 * (the final EOS is not included). */
{
  string_t s;
  long_t length;

  if (string_end == NULL)
    string_end = string + strlen (string);

  length = 0;
  for (s = string; s < string_end; s++)
  {
    if (*s == '\"' || *s == '\\')
      length += 2;
    else if (ORD (*s) < 32 || (ORD (*s) > 126 && ORD (*s) < 160))
      length += 4;
    else
      length += 1;
  }
  return length;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t copy_string_readable (string_t to, 
				      string_t from, 
				      string_t to_end,
				      string_t from_end)
/* Like "copy_string", but copy a "\" in front of quotes and backslashes
 * and copy any control chars in octal code: "\000". 
 * If <from_end> != NULL, it marks the end of the string to be copied. */
{
  if (from_end == NULL)
    from_end = from + strlen (from);

  for (; from < from_end; from++) 
  {
    if (*from == '\"' || *from == '\\') /* Prepend a backslash. */
    {
      if (to + 2 > to_end)
	break;
      
      *to++ = '\\';
      *to++ = *from;
    } 
    else if (ORD (*from) < 32 || (ORD (*from) > 126 && ORD (*from) < 160))
      /* Convert control chars to backslash and octal code. */
    {
      short_t i, code;
      
      if (to + 4 > to_end)
	break;
      
      code = ORD (*from);
      for (i = 3; i >= 1; i--) 
      {
	to[i] = '0' + code % 8;
	code = code / 8; 
      }
      to[0] = '\\';
      to += 4;
    } 
    else /* Copy literally. */
    {
      if (to + 1 > to_end)
	break;
      
      *to++ = *from;
    }
  }
  
  if (from < from_end)
    error ("string too long");
  
  if (to < to_end)
    *to = EOS;
  
  return to;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t new_string (string_t string)
/* Allocate memory and copy <string> into it.
 * The string can be deleted with "free". */
{
  string_t new_str;

  new_str = (string_t) new_vector (sizeof (char), strlen (string) + 1);
  strcpy (new_str, string);
  return new_str;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t new_string_readable (string_t string, string_t string_end)
/* Like "new_string", but copy a "\" in front of quotes
 * and copy any control chars in octal code: "\000". 
 * If <string_end> != NULL, it marks the end of the string. */
{
  long_t length;
  string_t new_string;

  length = readable_length (string, string_end);
  new_string = (string_t) new_vector (sizeof (char), length + 1);
  copy_string_readable (new_string, string, new_string + length + 1, 
			string_end);
  return new_string;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t new_string_section (string_t string, string_t end)
/* Allocate memory and copy the beginning of <string> into it.
 * <end> points to the first char that is not part of the beginning.
 * The string can be freed with "free". */
{
  string_t new_string, new_string_ptr;

  new_string_ptr = new_string = new_vector (sizeof (char), end - string + 1);
  while (string < end)
    *new_string_ptr++ = *string++;
  *new_string_ptr = EOS;
  return new_string;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t concat_strings (string_t first_string, ...)
/* Concatenate a list of strings and return the result string.
 * Must have NULL-terminated list of strings as parameters.
 * The result string can be freed with "free" when it's no longer used. */
{
  va_list arg;
  size_t length;
  string_t next_string, string, string_ptr, end_of_string;

  /* Compute length of the result string. */
  va_start (arg, first_string);
  length = strlen (first_string);
  for (next_string = va_arg (arg, string_t); 
       next_string != NULL;
       next_string = va_arg (arg, string_t))
    length += strlen (next_string);
  va_end (arg);

  /* Concatenate strings. */
  va_start (arg, first_string);
  string = (string_t) new_vector (sizeof (char), length + 1);
  end_of_string = string + length + 1;
  string_ptr = copy_string (string, first_string, end_of_string);
  for (next_string = va_arg (arg, string_t); 
       next_string != NULL; 
       next_string = va_arg (arg, string_t))
    string_ptr = copy_string (string_ptr, next_string, end_of_string);
  va_end (arg);
  
  return string;
}

/*---------------------------------------------------------------------------*/

GLOBAL string_t next_non_space (string_t string)
/* Return <string>, but without leading spaces. */
{
  while (IS_SPACE (*string))
    string++;
  return string;
}

/*---------------------------------------------------------------------------*/

GLOBAL short_t strcmp_no_case (string_t str1, string_t str2)
/* Return (case insensitive) lexical order of <str1> and <str2>:
 * Result is -1 if <str1> < <str2>,
 *            0 if <str1> = <str2>,
 *            1 if <str1> > <str2>. */
{
  /* Find first char where <str1> and <str2> differ. */
  while (TO_LOWER (*str1) == TO_LOWER (*str2)) 
  {
    if (*str1 == EOS) /* means *<str2> == EOS, too */
      return 0;
    
    str1++;
    str2++;
  } 
  
  return (ORD (TO_LOWER (*str1)) < ORD (TO_LOWER (*str2))) ? -1 : 1;
}

/*---------------------------------------------------------------------------*/

GLOBAL short_t strncmp_no_case (string_t str1, string_t str2, long_t n)
/* Return (case insensitive) lexical order of <str1> and <str2>,
 * but compare only the first <n> characters.
 * Result is -1 if <str1> < <str2>,
 *            0 if <str1> = <str2>,
 *            1 if <str1> > <str2>. */ 
{
  /* Find first char where <str1> and <str2> differ. */
  while (TO_LOWER (*str1) == TO_LOWER (*str2) && n > 0) 
  {
    if (*str1 == EOS) /* means *<str2> == EOS, too */
      return 0;
    
    str1++;
    str2++;
    n--;
  }

  if (n == 0)
    return 0;
  else
    return (ORD (TO_LOWER (*str1)) < ORD (TO_LOWER (*str2))) ? -1 : 1;
}

/*---------------------------------------------------------------------------*/
