/*
 * $COPYRIGHT$
 *
 *	$Id: xmpi_sys.lam.cc,v 1.12 2001/07/11 15:43:13 bbarrett Exp $
 *
 *	Function:	- xmpi -> MPI system interface
 *			- LAM version
 */

#include <lam_config.h>

#include <sys/errno.h>
#include <stdlib.h>
#include <unistd.h>

#include <args.h>
#include <all_list.h>
#include <app_mgmt.h>
#include <app_schema.h>
#include <freq.h>
#include <laminternal.h>
#include <lamnet.h>
#include <kio.h>
#include <portable.h>
#include <priority.h>
#include <terror.h>
#include <trreq.h>
#include <typical.h>
#include <mpi.h>
#include <mpisys.h>
#include <net.h>

#include "xmpi.h"
#include "bhostparse.h"
#include "xmpi_error.h"
#include "xmpi_asc_parse.h"
#include "mpitr_com.h"
#include "trdrain.h"
#include "xmpi_sys.h"
#include "all_list.h"
#include "all_opt.h"
#include "app_mgmt.h"
#include "app_schema.h"
#include "args.h"
#include "events.h"
#include "freq.h"
#include "kio.h"
#include "laminternal.h"
#include "ndi.h"
#include "net.h"
#include "portable.h"
#include "priority.h"
#include "preq.h"
#include "terror.h"
#include "typical.h"
#include "sfh.h"

/*
 * extern functions
 */
extern "C" {
extern void lam_kexit(int);

extern int _lam_few(char**);
}

/*
 * private functions
 */
int local_initialize();
static int set_stdio();
static void reset_stdio();
static void report_run_error(LIST*);
static int get_mpi_world(int4, struct _gps *, struct _gps *);
static void error_cleanup(void);

/*
 * private variables
 */
static char version[] = "LAM-libxmpi 6.5.3";

/*
 * external variables
 */
extern struct kio_t _kio;	       /* kernel I/O block */
extern struct fclient _ufd[FUMAX];     /* user file desc. */

/*
 *	xmpi_sys_init
 *
 *	Function:	- initializes the MPI implementation interface
 *	Accepts:	- command line info string
 *	Returns:	- 0 or -1 (error)
 */
int
xmpi_sys_init()
{
  return (0);
}

/*
 *	xmpi_sys_finalize
 *
 *	Function:	- finalizes the MPI implementation interface
 */
void
xmpi_sys_finalize()
{
  lam_kexit(0);
}

/*
 *	xmpi_sys_run
 *
 *	Function:	- runs the parsed application schema
 *	Accepts:	- application schema (list)
 *			- application GPS array (returned)
 *			- length of application GPS array (returned)
 *	Returns:	- 0 or -1 (error)
 */
int
xmpi_sys_run(char *aschema, struct _gps **app_procs, 
	     int *app_nprocs)
{
  int4 rtf = 0;			       /* runtime flags */
  LIST *appd;			       /* parsed schema */
  LIST *appd_sched;		       /* scheduled app schema */
  struct _gps *world;		       /* world GPS array */
  struct _gps *mpiworld;	       /* MPI world GPS array */
  int world_n;			       /* size of world */

  if (local_initialize())
    return (-1);

  if (xmpi_asc_parse(aschema, &rtf, &appd) == 0) {
    xmpi_error(0, (char*) "Parsing application");
    return (-1);
  }

  if ((appd_sched = asc_schedule(appd)) == 0) {
    asc_free(appd);
    xmpi_error(0, (char*) "Scheduling application");
    return (-1);
  }
  asc_free(appd);
/*
 * Get the process table, store it for XMPI and send it to MPI processes.
 */
  world_n = al_count(appd_sched);
  mpiworld = (struct _gps *) malloc(world_n * sizeof(struct _gps));
  world = (struct _gps *) malloc(world_n * sizeof(struct _gps));

  if (world == 0 || mpiworld == 0) {
    asc_free(appd_sched);
    xmpi_error(0, (char*) "malloc");
    return (-1);
  }
  if (set_stdio()) {
    asc_free(appd_sched);
    xmpi_error(0, (char*) "set_stdio");
    return (-1);
  }
  if (asc_run(appd_sched, 0, rtf, 0, TRUE, world)) {
    reset_stdio();
    report_run_error(appd_sched);
    asc_free(appd_sched);
    free(mpiworld);
    free(world);
    return (-1);
  }
  reset_stdio();
  asc_free(appd_sched);
/*
 * Read pids and indices from MPI processes.
 */
  if (get_mpi_world(world_n, world, mpiworld)) {
    app_doom(world_n, mpiworld, SIGUDIE);
    app_doom(world_n, world, -15);
    error_cleanup();
    xmpi_error(0, (char*) "get_mpi_world");
    return (-1);
  }
/*
 * Send process table to all processes.
 */
  if (app_sendprocs(world_n, mpiworld)) {
    app_doom(world_n, mpiworld, SIGUDIE);
    app_doom(world_n, world, -15);
    error_cleanup();
    xmpi_error(0, (char*) "app_sendprocs");
    return (-1);
  }
  *app_nprocs = world_n;
  *app_procs = mpiworld;

  free(world);

  return (0);
}

/*
 *	xmpi_sys_kill
 *
 *	Function:	- kills the running application
 *	Accepts:	- GPS array (unused)
 *			- GPS array length (unused)
 *	Returns:	- 0 or -1 (error)
 */
int
xmpi_sys_kill(struct _gps *, int)
{
  char **av;			       /* cmd line args */
  int ac;			       /* # cmd line args */
  int r;			       /* result of lamclean */

  av = 0;
  ac = 0;
  argvadd(&ac, &av, "lamclean");

  r = _lam_few(av);
  argvfree(av);

  if (r < 0) {
    return (-1);
  } else if (r > 0) {
    errno = r;
    return (-1);
  }
  return (0);
}


/*
 *	xmpi_sys_comm
 *
 *	Function:	- fetch MPI communicator trace specified by member
 *			  process and context ID
 *			- space is allocated to hold the trace
 *	Accepts:	- process GPS
 *			- context ID
 *	Returns:	- trace or 0
 */
void *
xmpi_sys_comm(struct _gps *proc, int cid)
{
  void *trace;

  if (mpitr_comget(proc->gps_node, proc->gps_pid, cid, (char **) &trace)) {
    return (0);
  } else {
    return (trace);
  }
}

/*
 *	xmpi_sys_dtype
 *
 *	Function:	- fetch MPI datatype trace specified by creator
 *			  process and datatype label
 *			- space is allocated to hold the trace
 *			- null trace may be returned for basic datatypes
 *	Accepts:	- process GPS
 *			- datatype label
 *	Returns:	- trace or -1 (error)
 */
void *
xmpi_sys_dtype(struct _gps *proc, int dtype)
{
  char *trace = 0;

  if (mpitr_dtypeget(proc->gps_node, proc->gps_pid, dtype, &trace)) {
    return ((void *) -1);
  } else {
    return (trace);
  }
}

/*
 *	xmpi_sys_trace
 *
 *	Function:	- fetches communication traces from MPI implementation
 *	Accepts:	- open file handle
 *			- GPS array
 *			- GPS array length
 *	Returns:	- 0 or -1 (error)
 */
int
xmpi_sys_trace(int fd, struct _gps *app_procs, int app_nprocs)
{
  struct _gps *proc;		       /* first process in world */

/*
 * We already have the GPS array but go and get the world trace anyway
 * as a sanity check.
 */
  proc = app_procs;

  if (lam_rtrfget(proc->gps_node, TRWORLD, proc->gps_pid, fd) <= 0) {
    return (-1);
  }
  if (trdrain_mpi(fd, app_procs, app_nprocs, 1, XMPI_FLUSHDELAY)) {
    return (-1);
  }
  return (0);
}

/*
 *	xmpi_sys_errorstr
 *
 *	Function:	- format system error message
 *	Accepts:	- error number
 *	Returns:	- pointer to static system error string
 *			  else 0
 */
char *
xmpi_sys_errorstr(int err)
{
  static char errstring[256];	       /* error string */

  int errsave;			       /* save value of errno */

  errstring[0] = 0;

  if (err == LAM_EEXIT) {
    strcpy(errstring, "LAM error");
  } else {

    errsave = errno;
    errno = err;

    lam_errorstr(errstring, 255);

    errno = errsave;
  }

  return (errstring);
}

/*
 *	xmpi_sys_version
 *
 *	Function:	- get library version
 *	Returns:	- library version string
 */
char *
xmpi_sys_version()
{
  return (version);
}

/*
 *	xmpi_sys_logo
 *
 *	Function:	- get vendor specific logo
 *	Returns:	- XPM format vendor logo
 */
char **
xmpi_sys_logo()
{
  return (0);
}

/*
 *	initialize
 *
 *	Function:	- one time initialization
 *	Returns:	- 0 or -1 (error)
 */
int
local_initialize()
{
  char *cwd;			       /* current working directory */
  static int first = 1;

/*
 * Become a LAM process if not already one.
 */
  if (first == 0)
    return 0;
  first = 0;

  if (kinit(PRCMD)) {

    if (errno == ENOKERNEL) {
      first = 1;
      xmpi_error(0, (char*) "Please boot LAM");
    } else {
      xmpi_error(0, (char*) "Attaching to daemon");
    }

    return (-1);
  }

/*
 * Change local working directory.
 */
  if ((cwd = getworkdir()) == 0) {
    first = 1;
    xmpi_error(0, (char*) "getworkdir");
    return (-1);
  }
  if (lam_rfchdir(LOCAL, cwd)) {
    first = 1;
    xmpi_error(0, (char*) "lam_rfchdir");
    free(cwd);
    return (-1);
  }
  free(cwd);
/*
 * Set job identifier to be inherited by the applications.
 */
  _kio.ki_jobid.jid_node = getnodeid();
  _kio.ki_jobid.jid_pid = getpid();

  return (0);
}

/*
 *	set_stdio
 *
 *	Function:	- set up application stdio
 *	Returns:	- 0 or -1 (error)
 */
static int
set_stdio()
{

#if (LAM_HAVE_BSD43_FD_PASSING || LAM_HAVE_BSD44_FD_PASSING || LAM_HAVE_SYSV_FD_PASSING)
  char server[LAM_PATH_MAX];	       /* fd server socket name */

/*
 * Pass stdin, stdout and stderr to filed.
 */
  if (lam_mktmpid((int) getpid(), server, sizeof(server))) {
    return (-1);
  }
  if (lam_lfopenfd(server)) {
    return (-1);
  }
/*
 * Set LAM file descriptors to the passed file descriptors.  The call to
 * lam_lfopenfd() takes care of the case when stdin is a tty.
 */
  _kio.ki_stdin = _ufd[0].fu_tfd;
  _kio.ki_stdout = _ufd[1].fu_tfd;
  _kio.ki_stderr = _ufd[2].fu_tfd;
#endif

  return (0);
}

/*
 *	reset_stdio
 *
 *	Function:	- reset stdio so rfatexit will clean it up
 *	Returns:	- 0 or -1 (error)
 */
static void
reset_stdio()
{
  _kio.ki_stdin = 0;
  _kio.ki_stdout = 1;
  _kio.ki_stderr = 2;
}

/*
 *	report_run_error
 *
 *	Function:	- nice error message when application startup fails
 *	Accepts:	- application descriptor
 */
static void
report_run_error(LIST *appd)
{
  int i;

  struct aschema *pp;		       /* ptr process entry */

  char buf[512];

  errno = 0;
  pp = (struct aschema *) al_top(appd);

  for (i = 0; pp; ++i, pp = (struct aschema *) al_next(appd, pp)) {
    if (pp->asc_errno) {
      errno = pp->asc_errno;
      break;
    }
  }

  if (errno == ENOENT) {
    sprintf(buf, "Cannot start \"%s\"", pp->asc_args->apa_argv[0]);
    xmpi_error(0, buf);
  } else {
    xmpi_error(0, (char*) "Starting application");
  }
}

/*
 *	get_mpi_world
 *
 *	Function:	- get MPI world
 *	Accepts:	- size of world
 *			- initial process world
 *			- MPI process world (out)
 *	Returns:	- 0 or LAMERROR
 */
static int
get_mpi_world(int4 world_n, struct _gps * world, struct _gps * mpiworld)
{
  struct nmsg msg;

  int i;

  int j;

  memcpy(mpiworld, world, world_n * sizeof(struct _gps));
  for (i = 0; i < world_n; i++) {
    mpiworld[i].gps_pid = 0;
  }

  LAM_ZERO_ME(msg);
  msg.nh_event = -getpid() & 0xBFFFFFFF;
  msg.nh_length = 0;
  msg.nh_flags = DINT4DATA;

  for (i = 0; i < world_n; i++) {
    msg.nh_type = 3;
    if (nrecv(&msg)) {
      return (LAMERROR);
    }
    if (msg.nh_type == 1) {
      return (LAMERROR);
    }
/*
 * Set the MPI process pid and index.
 */
    j = msg.nh_data[0];
    if (j < 0 || j >= world_n) {
      errno = EIMPOSSIBLE;
      return (LAMERROR);
    }
    mpiworld[j].gps_pid = msg.nh_data[1];
    mpiworld[j].gps_idx = msg.nh_data[2];
  }

  return (0);
}

/*
 *	error_cleanup
 *
 *	Function:	- try to clean up init and wait messages
 *			- this is not foolproof but better than nothing
 */
static void
error_cleanup(void)
{
  struct nmsg msg;

/*
 * Wait a little while.
 */
  sleep(1);

  LAM_ZERO_ME(msg);
  msg.nh_event = (-getpid()) & 0xBFFFFFFF;
  msg.nh_length = 0;
  msg.nh_flags = DINT4DATA;
/*
 * Loop trying to receive init messages and wait messages.
 */
  while (1) {
    msg.nh_type = 3;
    if (ntry_recv(&msg))
      break;
  }
}

#ifdef HPUX
/*
 * These functions implement the "build and run" and "option setting"
 * dialogs when XMPI is built for running on a HP machine and uses the
 * LAM libxmpi.	 They are to be removed once the HP "bypass" becomes
 * unnecessary.
 */
void
xmpi_hp_run_dialog(Widget parent)
{
  xmpi_run_dialog(parent);
}

void
xmpi_hp_options_set(Widget parent)
{
  xmpi_options_set(parent);
}

#endif				       /* HPUX */
