From 8f27e65bddd7d4b8515ce620fb485fdd78fcdf89 Mon Sep 17 00:00:00 2001 From: Constantin Jucovschi Date: Fri, 24 Apr 2009 07:20:22 -0400 Subject: Initial commit --- rasmgr/rasmgr_localsrv.cc | 368 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 rasmgr/rasmgr_localsrv.cc (limited to 'rasmgr/rasmgr_localsrv.cc') diff --git a/rasmgr/rasmgr_localsrv.cc b/rasmgr/rasmgr_localsrv.cc new file mode 100644 index 0000000..0d16318 --- /dev/null +++ b/rasmgr/rasmgr_localsrv.cc @@ -0,0 +1,368 @@ +/* +* This file is part of rasdaman community. +* +* Rasdaman community is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* Rasdaman community is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with rasdaman community. If not, see . +* +* Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2009 Peter Baumann / +rasdaman GmbH. +* +* For more information please see +* or contact Peter Baumann via . +/ +/** + * SOURCE: rasmgr_localsrv.cc + * + * MODULE: rasmgr + * CLASS: LocalServer, LocalServerManager + * + * PURPOSE: + * management of rasserver executables + * + * COMMENTS: + * None + * +*/ + +using namespace std; + +#include "rasmgr_localsrv.hh" +#include "rasmgr_master.hh" +#include "rasmgr_srv.hh" +#include +#include + +#include "raslib/rminit.hh" + +#include "debug.hh" + + +// aux function for now() to avoid a compiler warning (see 'man strftime') +size_t my_strftime(char *s, size_t max, const char *fmt, const struct tm *tm) +{ + return strftime(s, max, fmt, tm); +} + +// now(): aux function returning, as a static string, the current time +// keep in sync with same function in rasserver +const char* now() +{ + size_t strfResult = 0; // return value of strftime() + static char timestring[50]; // must hold 20+1 chars + + time_t t = time(NULL); // get time + struct tm* tm = localtime(&t); // break down time + strfResult = my_strftime( timestring, sizeof(timestring), "[%F %T]", tm ); // format time + if (strfResult == 0) // bad luck? then take fallback message + (void) strncpy( timestring, "[-no time available-]", sizeof(timestring) ); + return( timestring ); +} + +LocalServer::LocalServer() + { serverName[0]=0; + valid=false; + serverPid=0; + } + +void LocalServer::init(const char *name,pid_t p) + { strcpy(serverName,name); + serverPid=p; + valid=true; + } +const char* LocalServer::getName() + { return serverName; + } + +pid_t LocalServer::getPID() + { return serverPid; + } +bool LocalServer::isValid() + { return valid; + } + +//####################################### +void catch_SIGCHLD(int) + { localServerManager.childSignalIn(); + } +//####################################### + +LocalServerManager::LocalServerManager() + { wasSignal=false; + + signal (SIGCHLD, catch_SIGCHLD); + } +LocalServerManager::~LocalServerManager() + { + } +bool LocalServerManager::startNewServer(const char* commandline) + { + ENTER( "LocalServerManager::startNewServer: enter. cmdLine=" << commandline ); + char localcomm[300]; + strcpy(localcomm,commandline); + + int i; + const int maxarg=50; + char* argv[maxarg]; // rasserver command line + char* fileName; // name of executable, e.g., "rasserver" + char* serverName; // symbolic server name, e.g., "S1" + + char *pos=localcomm; + + for(i=0;igetName() << ", pid " << iter->getPID() << "..." << flush; + int killResult = kill(iter->getPID(),SIGTERM); + if (killResult == -1) + { + cout << "Error: " << strerror(errno) << endl; + result = false; + } + else + { + iter = srvList.erase(iter); + VLOG << "ok" << endl; + result = true; + break; + } + } + + iter++; + } // for + + if (!found) + { + cout << "failed: server unknown." << endl; + result = false; + } + + LEAVE( "LocalServerManager::sendTerminateSignal: leave. result=" << result ); + return result; +} + +// killServer: terminate server process. +// if name is in list of known servers, try to kill; otherwise, complain & do nothing. +// returns: +// true iff server was found and killed successfully +// false on error +bool LocalServerManager::killServer(const char *serverName) +{ + ENTER( "LocalServerManager::killServer: enter. serverName=" << serverName ); + + bool found = false; // list entry pertaining to serverName found? + bool result = false; // function result + + list::iterator iter=srvList.begin(); + for(int i=0;igetName(),serverName)==0) + { + found = true; + VLOG << now() << " killing rasdaman server " << iter->getName() << ", pid " << iter->getPID() << "..." << flush; + + int killResult = kill(iter->getPID(),SIGKILL); + if (killResult == -1) + { + cout << "Error: " << strerror(errno) << endl; + result = false; + } + else + { + iter = srvList.erase(iter); + VLOG << "ok" << endl; + result = true; + break; + } + } + iter++; + } + + if (!found) + { + cout << "failed: server unknown." << endl; + result = false; + } + + LEAVE( "LocalServerManager::killServer: leave. result=" << result ); + return result; +} + +LocalServer& LocalServerManager::operator[](int x) + { list::iterator iter=srvList.begin(); + for(int i=0;i::iterator iter=srvList.begin(); + for(int i=0;igetName() << "." ); + if(iter->getPID()==exitpid) + { + TALK( "LocalServerManager::cleanChild: rasdaman server " << iter->getName() << " terminated illegally, status=" << status ); + + cout<<"Error: rasdaman server " << iter->getName() << ", pid " << exitpid << " terminated illegally, reason: "; + // see 'man waitpid': decoding of status variable + if (WIFEXITED(status) != 0) + cout << "exited with return code " << WEXITSTATUS(status); + else if (WIFSIGNALED(status)) + cout << "uncaught signal " << WTERMSIG(status); + else + cout << "(unknown reason)"; + cout << endl; + + // choices: restart silently the dead server or + // just tell the manager about it + // Not restart from here, because of sync problem for capabilities, master has to do that!!! + LocalServer temp=*iter; + srvList.erase(iter); + + reportDeadServer(temp); + break; + } + iter++; + } // for + } //while + + wasSignal=false; + LEAVE( "LocalServerManager::cleanChild: leave." ); + } + +void LocalServerManager::reportDeadServer(LocalServer &srv) + { + ENTER( "LocalServerManager::reportDeadServer: enter." ); + + int dummy = -1; + RasServer &r=rasManager[srv.getName()]; + + if(r.isValid()) r.changeStatus(SERVER_CRASHED,dummy); + LEAVE( "LocalServerManager::reportDeadServer: leave." ); + } + -- cgit