/* * This file is part of rasdaman community. * * Rasdaman community is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Rasdaman community is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with rasdaman community. If not, see . * * Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2009 Peter Baumann / rasdaman GmbH. * * For more information please see * or contact Peter Baumann via . / /** * SOURCE: rasmgr_localsrv.cc * * MODULE: rasmgr * CLASS: LocalServer, LocalServerManager * * PURPOSE: * management of rasserver executables * * COMMENTS: * None * */ using namespace std; #include "rasmgr_localsrv.hh" #include "rasmgr_master.hh" #include "rasmgr_srv.hh" #include #include #include // ARG_MAX // fix for missing ARG_MAX; workaround for glibc-2.8 and above #if defined(_SC_ARG_MAX) # if defined(ARG_MAX) # undef ARG_MAX # endif # define ARG_MAX sysconf (_SC_ARG_MAX) #endif #include "raslib/rminit.hh" #include "debug.hh" // aux function for now() to avoid a compiler warning (see 'man strftime') size_t my_strftime(char *s, size_t max, const char *fmt, const struct tm *tm) { return strftime(s, max, fmt, tm); } // now(): aux function returning, as a static string, the current time // keep in sync with same function in rasserver const char* now() { size_t strfResult = 0; // return value of strftime() static char timestring[50]; // must hold 20+1 chars time_t t = time(NULL); // get time struct tm* tm = localtime(&t); // break down time strfResult = my_strftime( timestring, sizeof(timestring), "[%F %T]", tm ); // format time if (strfResult == 0) // bad luck? then take fallback message (void) strncpy( timestring, "[-no time available-]", sizeof(timestring) ); return( timestring ); } LocalServer::LocalServer() { serverName[0]=0; valid=false; serverPid=0; } void LocalServer::init(const char *name,pid_t p) { strcpy(serverName,name); serverPid=p; valid=true; } const char* LocalServer::getName() { return serverName; } pid_t LocalServer::getPID() { return serverPid; } bool LocalServer::isValid() { return valid; } //####################################### void catch_SIGCHLD(int) { localServerManager.childSignalIn(); } //####################################### LocalServerManager::LocalServerManager() { wasSignal=false; signal (SIGCHLD, catch_SIGCHLD); } LocalServerManager::~LocalServerManager() { } bool LocalServerManager::startNewServer(const char* commandline) { ENTER( "LocalServerManager::startNewServer: enter. cmdLine=" << commandline ); char localcomm[ARG_MAX]; if (strlen(commandline) >= ARG_MAX) { VLOG <<"Error: rasserver launch command line too long: " << commandline <getName() << ", pid " << iter->getPID() << "..." << flush; int killResult = kill(iter->getPID(),SIGTERM); if (killResult == -1) { cout << "Error: " << strerror(errno) << endl; result = false; } else { iter = srvList.erase(iter); VLOG << "ok" << endl; result = true; break; } } iter++; } // for if (!found) { cout << "failed: server unknown." << endl; result = false; } LEAVE( "LocalServerManager::sendTerminateSignal: leave. result=" << result ); return result; } // killServer: terminate server process. // if name is in list of known servers, try to kill; otherwise, complain & do nothing. // returns: // true iff server was found and killed successfully // false on error bool LocalServerManager::killServer(const char *serverName) { ENTER( "LocalServerManager::killServer: enter. serverName=" << serverName ); bool found = false; // list entry pertaining to serverName found? bool result = false; // function result list::iterator iter=srvList.begin(); for(int i=0;igetName(),serverName)==0) { found = true; VLOG << now() << " killing rasdaman server " << iter->getName() << ", pid " << iter->getPID() << "..." << flush; int killResult = kill(iter->getPID(),SIGKILL); if (killResult == -1) { cout << "Error: " << strerror(errno) << endl; result = false; } else { iter = srvList.erase(iter); VLOG << "ok" << endl; result = true; break; } } iter++; } if (!found) { cout << "failed: server unknown." << endl; result = false; } LEAVE( "LocalServerManager::killServer: leave. result=" << result ); return result; } LocalServer& LocalServerManager::operator[](int x) { list::iterator iter=srvList.begin(); for(int i=0;i::iterator iter=srvList.begin(); for(int i=0;igetName() << "." ); if(iter->getPID()==exitpid) { TALK( "LocalServerManager::cleanChild: rasdaman server " << iter->getName() << " terminated illegally, status=" << status ); cout<<"Error: rasdaman server " << iter->getName() << ", pid " << exitpid << " terminated illegally, reason: "; // see 'man waitpid': decoding of status variable if (WIFEXITED(status) != 0) cout << "exited with return code " << WEXITSTATUS(status); else if (WIFSIGNALED(status)) cout << "uncaught signal " << WTERMSIG(status); else cout << "(unknown reason)"; cout << endl; // choices: restart silently the dead server or // just tell the manager about it // Not restart from here, because of sync problem for capabilities, master has to do that!!! LocalServer temp=*iter; srvList.erase(iter); reportDeadServer(temp); break; } iter++; } // for } //while wasSignal=false; LEAVE( "LocalServerManager::cleanChild: leave." ); } void LocalServerManager::reportDeadServer(LocalServer &srv) { ENTER( "LocalServerManager::reportDeadServer: enter." ); int dummy = -1; RasServer &r=rasManager[srv.getName()]; if(r.isValid()) r.changeStatus(SERVER_CRASHED,dummy); LEAVE( "LocalServerManager::reportDeadServer: leave." ); }