/*
  Copyright Mission Critical Linux, 2000

  Kimberlite is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  Kimberlite is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Kimberlite; see the file COPYING.  If not, write to the
  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
  MA 02139, USA.

 * Author: Gregory P. Myrdal <Myrdal@MissionCriticalLinux.Com>
 *
 * svcmgr_action.c
 *
 *
 * This file contains functions that deal with Service Manager service
 * actions.  This generally means what the Service Manager needs to do
 * when starting and stopping services.  The svc.c module contains the
 * specific service functions that perform the service start or stop.
 */

/*
 * Version string that is filled in by CVS
 */
static const char *version __attribute__ ((unused)) = "$Revision: 1.11 $";

/*
 * System includes
 */
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <linux/reboot.h>
#include <sys/reboot.h>
#include <sys/syslog.h>

/*
 * Cluster includes
 */
#include <clusterdefs.h>
#include <svcmgr.h>
#include <logger.h>
#include "svcmgr_proto.h"

/*
 * Internal functions
 */
static int waitForService(int svcID, int svcState, int nodeID, int svcEndState, int endNodeID);

/*
 * Externally defined functions
 */
extern void clu_lock(void);
extern void clu_un_lock(void);

/*
 * Externally defined variables
 */
extern char errbuf[];
extern int myNodeID;
extern char *myNodeName;


/*
 * stopAllServices()
 *
 * Run the stop scripts for all services except for disabled services.  This 
 * should only be called at system start time when we knew we should not be
 * running any services.  This runs the service stop scripts for all services 
 * such that any clean up is done if the service crashed leaving persistent 
 * service state or configuration on the system.
 *
 * Note: this function does not change the state of any service,
 * it is intended to just run the service stops for cleanup. 
 */
int
stopAllServices(void)
{
	int svcID;
	char *svcName;
	ServiceBlock svcStatus;

	clulog(LOG_INFO, "Stopping services\n");

	for (svcID=MIN_SERVICE; svcID < MAX_SERVICES; svcID++)
	  {
	    if (serviceExists(svcID) != YES)
	        continue;

	    /*
	     * If the service is disabled it might be in a broken state,
	     * do not expect the stops to work to successfully start the
	     * Service Manager.
	     */
	    if (isServiceDisabled(svcID) == YES)
	        continue;

	    getSvcName(svcID, &svcName);

	    if (stopService(svcID) != SUCCESS)
	      {
	        clulog(LOG_ALERT,
"Cannot stop service %s during service initialization\n", svcName);
	        return(FAIL);
	      }

	    clu_lock();
            if (reqServiceStatus(svcID, &svcStatus) != SUCCESS)
              {
                clulog(LOG_ERR, "Cannnot get status for service %s\n",
	                svcName);
                clu_un_lock();
	        return(FAIL);
              }                    

	    if (svcStatus.owner == myNodeID)
	      {
	        svcStatus.owner = NODE_ID_NONE;
	        svcStatus.state = SVC_STOPPED;
	        if (reqServiceStatusChange(&svcStatus) != SUCCESS)
	          {
                    clu_un_lock();
	            return(FAIL);
	          }
	      }
            clu_un_lock();
	  }

	return(SUCCESS);
}

/*
 * reqStartService()
 *
 * Request the start of a service.  If the service start fails attempt
 * to stop it so that it is not partially configured on this system.
 * If stopping the service fails we do not know what might be configured
 * on the system so we put it in SVC_ERROR state for user intervention.
 */
int
reqStartService(int svcID, int waitFlag)
{
	ServiceBlock svcStatus;
	char *svcName;
	int pid;

	getSvcName(svcID, &svcName);
	
	/*
	 * Fork off a separate process as we want to be able to do
	 * multiple service starts at the same time.
	 */
 	switch (pid=fork())
          {
          case -1:                      // error
            sprintf(errbuf, "fork failed: %s", sys_errlist[errno]);
            clulog(LOG_ERR, "%s", errbuf);
            return(FAIL);
            break;
          case 0:                       // child
            clulog(LOG_DEBUG, "Forked process %d to start service %s\n",
	           pid, svcName);
            break;
          default:                      // parent
              return(SUCCESS);
            break;
          }                

	closeChildFDs();		// close any file descriptors

	if (waitFlag == YES)
	  {
	    clulog(LOG_NOTICE, 
"Waiting for service %s to stop before starting\n", svcName);

	    switch (waitForService(svcID, SVC_STOPPED, NODE_ID_NONE,
	                                  SVC_RUNNING, myNodeID))
	      {
	        case SUCCESS:		// service is stopped
	          break;		// we have the cluster lock

	        case NOT_FOUND:		// service already running on myNodeID
	          exit(SUCCESS);	// child exiting
	          break;

	        case TIMEOUT:
	          clulog(LOG_WARNING, 
"Timeout waiting to start service %s\n", svcName);
	          exit(FAIL);		// child exiting
	          break;

	        case FAIL:
	        default:
	          clulog(LOG_WARNING, 
"Wait and start of service %s failed\n", svcName);
	          exit(FAIL);		// child exiting
	      }

	    // NOTE: we have the cluster lock, need to unlock before child exit

	    svcStatus.id=svcID;
	    svcStatus.owner=myNodeID;
	    svcStatus.state=SVC_STARTING;

	    if (reqServiceStatusChange(&svcStatus) != SUCCESS)
              {
                clu_un_lock();
                exit(FAIL);			// child exiting
              }               

            clu_un_lock();
	  }

	svcStatus.id=svcID;
	svcStatus.owner=myNodeID;

	if ((startService(svcID)) != SUCCESS)
	  {
	    svcStatus.state = SVC_STOPPING;
	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        exit(FAIL);		// child exiting

	    if ((stopService(svcID)) != SUCCESS)
	      {
	        svcStatus.state = SVC_ERROR;
	      }
	    else
	      {
	        svcStatus.state = SVC_STOPPED;
	        svcStatus.owner = NODE_ID_NONE;
	      }

	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        exit(FAIL);		// child exiting

	    exit(FAIL);			// child exiting
	  }

	svcStatus.state = SVC_RUNNING;
	if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	    exit(FAIL);			// child exiting

	exit(SUCCESS);			// child exiting
}

/*
 * reqWaitAndStartService
 *
 * Request to wait and then start the service.  Wait until the service
 * is in the SVC_STOPPED state and then start it.
 */
int
reqWaitAndStartService(int svcID)
{
	char *svcName;

	if (reqStartService(svcID, YES) != SUCCESS)
	  {
	    getSvcName(svcID, &svcName);
	    clulog(LOG_ERR, "Cannot wait and start service %s\n", svcName);
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*
 * reqStopService()
 *
 * Request the stop of a service.  If the service stop fails, check to see
 * if the system is shutting down.  If it is, leave it stuck in the 
 * SVC_STOPPING state as the system will take all resources down when it goes
 * down.  Leaving the service in the SVC_STOPPING state while this node is still
 * up will also stop any other node from trying to start the service.  If
 * the system is not going down, try and restart the service to keep it
 * available.  If this fails we do not know what state the service is in
 * and we are unable to keep it highly available.  Shutdown the system to
 * allow this service to run on another cluster node.
 */
int
reqStopService(int svcID, int waitFlag)
{
	ServiceBlock svcStatus;
	char *svcName;
	int pid;

	svcStatus.id = svcID;
	svcStatus.owner = myNodeID;
	svcStatus.state = SVC_UNINITIALIZED;	// to be changed later

	getSvcName(svcID, &svcName);

	/*
	 * Fork off a separate process as we want to be able to do
	 * multiple service stops at the same time.
	 */
 	switch (pid=fork())
          {
          case -1:                      // error
            sprintf(errbuf, "fork failed: %s", sys_errlist[errno]);
            clulog(LOG_ERR, "%s", errbuf);
            return(FAIL);
            break;
          case 0:                       // child
            clulog(LOG_DEBUG, "Forked process %d to stop service %s\n", 
	           pid, svcName);
            break;
          default:                      // parent
              return(SUCCESS);
            break;
          }                

	closeChildFDs();		// close any file descriptors

	if (waitFlag == YES)
	  {
	    clulog(LOG_NOTICE, 
"Waiting for service %s to start before stopping\n", svcName);

	    switch (waitForService(svcID, SVC_RUNNING, myNodeID,
	                                  SVC_STOPPED, NODE_ID_NONE))
	      {
	        case SUCCESS:		// service is running on myNodeID
	          break;		// we have the cluster lock

	        case NOT_FOUND:		// service already stopped
	          exit(SUCCESS);	// child exiting
	          break;

	        case TIMEOUT:
	          clulog(LOG_WARNING, 
"Timeout waiting to stop service %s\n", svcName);
	          exit(FAIL);		// child exiting
	          break;

	        case FAIL:
	        default:
	          clulog(LOG_WARNING, 
"Wait and stop of service %s failed\n", svcName);
	          exit(FAIL);		// child exiting
	      }

	    // NOTE: we have the cluster lock, need to unlock before child exit

	    svcStatus.id=svcID;
	    svcStatus.owner=myNodeID;
	    svcStatus.state=SVC_STOPPING;

	    if (reqServiceStatusChange(&svcStatus) != SUCCESS)
              {
                clu_un_lock();
                exit(FAIL);		// child exiting
              }               

            clu_un_lock();
	  }

	svcStatus.id=svcID;
	svcStatus.owner=myNodeID;

	if ((stopService(svcID)) != SUCCESS)
	  {
/*
	    if (systemIsShuttingDown)
	      {
	        clulog(LOG_INFO, 
"Leaving service in %s state as system is going down\n",
	                serviceStateStrings[SVC_STOPPING]);
	        exit(SUCCESS);		// child exiting
	      }
*/
	    svcStatus.state = SVC_STARTING;
	    if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	        exit(FAIL);		// child exiting

	    if ((startService(svcID)) != SUCCESS)
	      {
	        clulog(LOG_ERR, 
"Failed to restart service after failed stop, rebooting system\n");
		sprintf(errbuf, 
"Service Manager failed stop and start of service %s\n", svcName);
	        rebootSystem(errbuf);
	      }
	    svcStatus.state = SVC_RUNNING;
	  }
	else
	  {
	    svcStatus.state = SVC_STOPPED;
	    svcStatus.owner = NODE_ID_NONE;
	  }

	if (lockAndReqServiceStatusChange(&svcStatus) != SUCCESS)
	    exit(FAIL);			// child exiting

	exit(SUCCESS);			// child exiting
}

/*
 * reqWaitAndStopService
 *
 * Request to wait and then stop the service.  Wait until the service
 * is in the SVC_RUNNING state and then stop it.
 */
int
reqWaitAndStopService(int svcID)
{
	char *svcName;

	if (reqStopService(svcID, YES) != SUCCESS)
	  {
	    getSvcName(svcID, &svcName);
	    clulog(LOG_ERR, "Cannot wait and stop service %s\n", svcName);
	    return(FAIL);
	  }

	return(SUCCESS);
}

/*
 * waitForService
 *
 * Wait for service 'svcID' to move into state 'svcState'.  When it does
 * return SUCCESS.  If the service moves into the svcEndState and the
 * service is owned by us another process must have beaten us to the 
 * punch.  Return NOT_FOUND as the service is in the correct state and
 * we may never see this state.  If we timeout return FAIL.
 *
 * It is assumed that we are calling from a forked process so we do not
 * hang the main service manager process while waiting.
 *
 * NOTE: The caller, on SUCCESS return, needs to run unlock().
 */
static int
waitForService(int svcID, int svcState, int nodeID, 
	                  int svcEndState, int endNodeID)
{
	ServiceBlock svcStatus;
	char *svcName;
	int sleepTime=0;
	int printCount=15;
	char *svcOwnerName=(char *)NULL;

	getSvcName(svcID, &svcName);

	while (1)
	  {
	    clu_lock();
	    if (reqServiceStatus(svcID, &svcStatus) != SUCCESS)
	      {
	        clulog(LOG_ERR, 
"Cannot get service status; cannot wait for service %s\n", svcName);
	        clu_un_lock();
	        return(FAIL);
	      }
	
	    /*
	     * If the service moved into the state we were looking
	     * for return success.  Note: do not unlock, that is the
	     * callers job.  We do not want someone else to change
	     * the state of the service out from underneath us.
	     */
	    if ((svcStatus.state == svcState) &&
	        (svcStatus.owner == nodeID))
	      {
	        if (nodeID == NODE_ID_NONE)
	            svcOwnerName="nobody";
	        else
	            getNodeName(svcStatus.owner, &svcOwnerName);

	        clulog(LOG_DEBUG, 
	              "Service %s moved into %s state; owned by %s\n",
	              svcName, serviceStateStrings[svcStatus.state],
	              svcOwnerName);
	        return(SUCCESS);
	      }

	    clu_un_lock();

	    /*
	     * If the service moved into the the end state owned by owner, 
	     * then some other process already performed this operation, 
	     * there is nothing more to wait for.  We cannot return
	     * SUCCESS as the caller should not try and operate with the
	     * service, so we return NOT_FOUND as the state we were 
	     * looking for was never reached.
	     */
	    if ((svcStatus.state == svcEndState) &&
	        (svcStatus.owner == endNodeID))
	      {
	        clulog(LOG_DEBUG, 
"Service %s is already in %s state, wait cancelled\n",
	              svcName, serviceStateStrings[svcStatus.state]);
	        return(NOT_FOUND);
	      }

	    /*
	     * If the service moves into error, disabled, or disabling state,
	     * then we should exit because it will not get out of this state 
	     * automatically. Return NOT_FOUND as the state we were looking
	     * for was never found.
	     */
	    if ((svcStatus.state == SVC_ERROR) ||
	        (svcStatus.state == SVC_DISABLING) ||
	        (svcStatus.state == SVC_DISABLED))
	      {
	         clulog(LOG_DEBUG, 
"Service %s moved into the %s state, wait cancelled\n",
	              svcName, serviceStateStrings[svcStatus.state]);
	        return(NOT_FOUND);
	      }

	    if (printCount >= 15)
	      {
	         printCount = 0;
	         clulog(LOG_DEBUG, 
"Waiting on service %s state change, state = %s looking for %s\n",
	              svcName, serviceStateStrings[svcStatus.state],
	              serviceStateStrings[svcState]);
	      }

	    if (sleepTime >= SVC_MAX_SLEEP_TIME)
	      {
	        clulog(LOG_ERR, 
"Timeout waiting for service %s to go into %s state\n",
	               svcName, serviceStateStrings[svcState]);
	        return(TIMEOUT);
	      }

	    printCount=printCount+1;
	    sleepTime=sleepTime+SVC_SLEEP_TIME;
	    sleep(SVC_SLEEP_TIME);
	  }
}

