summaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2007-03-20 09:36:28 +1100
committerNeil Brown <neilb@suse.de>2007-03-20 09:36:28 +1100
commit33cfd406a19a38bc10a977109bd2baaed1228a79 (patch)
treed6d0f045a5b7a528195fe365d3134a6397cf030e /utils
parentf12ed63e95dec929d6893b16983233d2940a889c (diff)
downloadnfs-utils-33cfd406a19a38bc10a977109bd2baaed1228a79.tar.gz
nfs-utils-33cfd406a19a38bc10a977109bd2baaed1228a79.tar.xz
nfs-utils-33cfd406a19a38bc10a977109bd2baaed1228a79.zip
Add sm-notify from SuSE
Not included in build yet.
Diffstat (limited to 'utils')
-rw-r--r--utils/statd/sm-notify.8114
-rw-r--r--utils/statd/sm-notify.c678
2 files changed, 792 insertions, 0 deletions
diff --git a/utils/statd/sm-notify.8 b/utils/statd/sm-notify.8
new file mode 100644
index 0000000..29726d6
--- /dev/null
+++ b/utils/statd/sm-notify.8
@@ -0,0 +1,114 @@
+.\"
+.\" sm-notify(8)
+.\"
+.\" Copyright (C) 2004 Olaf Kirch <okir@suse.de>
+.TH sm-notify 8 "8 Mar 2004
+.SH NAME
+sm-notify \- Send out NSM reboot notifications
+.SH SYNOPSIS
+.BI "/sbin/sm-notify [-d] [-q] [-m " time "]
+.SH DESCRIPTION
+File locking over NFS requires a facility to notify peers in
+case of a reboot, so that clients can reclaim locks after
+a server crash, and/or
+servers can release locks held by the rebooted client.
+.PP
+This is a two-step process: during normal
+operations, a mechanism is required to keep track of which
+hosts need to be informed of a reboot. And of course,
+notifications need to be sent out during reboot.
+The protocol used for this is called NSM, for
+.IR "Network Status Monitor" .
+.PP
+Commonly, these two features are provided by the
+.B rpc.statd
+daemon.
+The SuSE Linux Kernel includes a kernel level implementation
+of
+.BR statd ", "
+which keeps track of the hosts that need notifications, and
+accepts reboot notifications.
+.PP
+This is complemented by the
+.B sm-notify
+application, which is responsible for sending out the reboot
+notifications.
+.SS Operation
+For each NFS client or server machine to be monitored,
+the kernel level
+.B statd
+creates a file in
+.BR /var/lib/nfs/sm ", "
+and removes the file if monitoring is no longer required.
+.PP
+When the machine is rebooted,
+.B sm-notify
+iterates through these files and notifies the peer
+.B statd
+server on those machines.
+.PP
+Each machine has an
+.I "NSM state" ,
+which is basically an integer counter that is incremented
+each time the machine reboots. This counter is stored
+in
+.BR /var/lib/nfs/state ,
+and updated by
+.BR sm-notify .
+.SH OPTIONS
+.TP
+.BI -m " failtime
+When notifying hosts,
+.B sm-notify
+will try to contact each host for up to 15 minutes,
+and will give up if unable to reach it within this time
+frame.
+.IP
+Using the
+.B -m
+option, you can override this. A value of 0 tells
+sm-notify to retry indefinitely; any other value is
+interpreted as the maximum retry time in minutes.
+.TP
+.BI -v " ipaddr
+This option tells
+.B sm-notify
+to bind to the specified
+.IR ipaddr ,
+so that all notification packets originate from this address.
+This is useful for NFS failover.
+.TP
+.BI -p " port
+instructs
+.B sm-notify
+to bind to the indicated IP
+.IR port
+number. If this option is not given, it will try to bind to
+a randomly chosen privileged port below 1024.
+.TP
+.B -q
+Be quiet. This suppresses all messages except error
+messages while collecting the list of hosts.
+.TP
+.B -n
+Do not update the NSM state. This is for testing only.
+.TP
+.B -d
+Enables debugging.
+By default,
+.B sm-notify
+forks and puts itself in the background after obtaining the
+list of hosts from
+.BR /var/lib/nfs/sm .
+.SH FILES
+.BR /var/lib/nfs/state
+.br
+.BR /var/lib/nfs/sm/*
+.br
+.BR /var/lib/nfs/sm.bak/*
+.SH SEE ALSO
+.BR rpc.nfsd(8),
+.BR portmap(8)
+.SH AUTHORS
+.br
+Olaf Kirch <okir@suse.de>
diff --git a/utils/statd/sm-notify.c b/utils/statd/sm-notify.c
new file mode 100644
index 0000000..7af0cea
--- /dev/null
+++ b/utils/statd/sm-notify.c
@@ -0,0 +1,678 @@
+/*
+ * Send NSM notify calls to all hosts listed in /var/lib/sm
+ *
+ * Copyright (C) 2004-2006 Olaf Kirch <okir@suse.de>
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <sys/param.h>
+#include <sys/syslog.h>
+#include <arpa/inet.h>
+#include <dirent.h>
+#include <time.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+#include <netdb.h>
+#include <errno.h>
+
+#ifndef BASEDIR
+#define BASEDIR "/var/lib/nfs"
+#endif
+
+#define _SM_STATE_PATH BASEDIR "/state"
+#define _SM_DIR_PATH BASEDIR "/sm"
+#define _SM_BAK_PATH _SM_DIR_PATH ".bak"
+
+#define NSM_PROG 100024
+#define NSM_PROGRAM 100024
+#define NSM_VERSION 1
+#define NSM_TIMEOUT 2
+#define NSM_NOTIFY 6
+#define NSM_MAX_TIMEOUT 120 /* don't make this too big */
+#define MAXMSGSIZE 256
+
+typedef struct sockaddr_storage nsm_address;
+
+struct nsm_host {
+ struct nsm_host * next;
+ char * name;
+ char * path;
+ nsm_address addr;
+ time_t last_used;
+ time_t send_next;
+ unsigned int timeout;
+ unsigned int retries;
+ unsigned int xid;
+};
+
+static char nsm_hostname[256];
+static uint32_t nsm_state;
+static int opt_debug = 0;
+static int opt_quiet = 0;
+static int opt_update_state = 1;
+static unsigned int opt_max_retry = 15 * 60;
+static char * opt_srcaddr = 0;
+static uint16_t opt_srcport = 0;
+static int log_syslog = 0;
+
+static unsigned int nsm_get_state(int);
+static void notify(void);
+static void notify_host(int, struct nsm_host *);
+static void recv_reply(int);
+static void backup_hosts(const char *, const char *);
+static void get_hosts(const char *);
+static void insert_host(struct nsm_host *);
+struct nsm_host * find_host(uint32_t);
+static int addr_parse(int, const char *, nsm_address *);
+static int addr_get_port(nsm_address *);
+static void addr_set_port(nsm_address *, int);
+static int host_lookup(int, const char *, nsm_address *);
+void nsm_log(int fac, const char *fmt, ...);
+
+static struct nsm_host * hosts = NULL;
+
+int
+main(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "dm:np:v:q")) != -1) {
+ switch (c) {
+ case 'd':
+ opt_debug++;
+ break;
+ case 'm':
+ opt_max_retry = atoi(optarg) * 60;
+ break;
+ case 'n':
+ opt_update_state = 0;
+ break;
+ case 'p':
+ opt_srcport = atoi(optarg);
+ break;
+ case 'v':
+ opt_srcaddr = optarg;
+ break;
+ case 'q':
+ opt_quiet = 1;
+ break;
+ default:
+ goto usage;
+ }
+ }
+
+ if (optind < argc) {
+usage: fprintf(stderr, "sm-notify [-d]\n");
+ return 1;
+ }
+
+ if (opt_srcaddr) {
+ strncpy(nsm_hostname, opt_srcaddr, sizeof(nsm_hostname)-1);
+ } else
+ if (gethostname(nsm_hostname, sizeof(nsm_hostname)) < 0) {
+ perror("gethostname");
+ return 1;
+ }
+
+ backup_hosts(_SM_DIR_PATH, _SM_BAK_PATH);
+ get_hosts(_SM_BAK_PATH);
+
+ if (!opt_debug) {
+ if (!opt_quiet)
+ printf("Backgrounding to notify hosts...\n");
+
+ openlog("sm-notify", LOG_PID, LOG_DAEMON);
+ log_syslog = 1;
+
+ if (daemon(0, 0) < 0) {
+ nsm_log(LOG_WARNING, "unable to background: %s",
+ strerror(errno));
+ return 1;
+ }
+
+ close(0);
+ close(1);
+ close(2);
+ }
+
+ /* Get and update the NSM state. This will call sync() */
+ nsm_state = nsm_get_state(opt_update_state);
+
+ notify();
+
+ if (hosts) {
+ struct nsm_host *hp;
+
+ while ((hp = hosts) != 0) {
+ hosts = hp->next;
+ nsm_log(LOG_NOTICE,
+ "Unable to notify %s, giving up",
+ hp->name);
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Notify hosts
+ */
+void
+notify(void)
+{
+ nsm_address local_addr;
+ time_t failtime = 0;
+ int sock = -1;
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ exit(1);
+ }
+ fcntl(sock, F_SETFL, O_NONBLOCK);
+
+ memset(&local_addr, 0, sizeof(local_addr));
+ local_addr.ss_family = AF_INET; /* Default to IPv4 */
+
+ /* Bind source IP if provided on command line */
+ if (opt_srcaddr) {
+ if (!addr_parse(AF_INET, opt_srcaddr, &local_addr)
+ && !host_lookup(AF_INET, opt_srcaddr, &local_addr)) {
+ nsm_log(LOG_WARNING,
+ "Not a valid hostname or address: \"%s\"\n",
+ opt_srcaddr);
+ exit(1);
+ }
+ /* We know it's IPv4 at this point */
+ }
+
+ /* Use source port if provided on the command line,
+ * otherwise use bindresvport */
+ if (opt_srcport) {
+ addr_set_port(&local_addr, opt_srcport);
+ if (bind(sock, (struct sockaddr *) &local_addr, sizeof(local_addr)) < 0) {
+ perror("bind");
+ exit(1);
+ }
+ } else {
+ (void) bindresvport(sock, (struct sockaddr_in *) &local_addr);
+ }
+
+ if (opt_max_retry)
+ failtime = time(NULL) + opt_max_retry;
+
+ while (hosts) {
+ struct pollfd pfd;
+ time_t now = time(NULL);
+ unsigned int sent = 0;
+ struct nsm_host *hp;
+ long wait;
+
+ if (failtime && now >= failtime)
+ break;
+
+ while ((wait = hosts->send_next - now) <= 0) {
+ /* Never send more than 10 packets at once */
+ if (sent++ >= 10)
+ break;
+
+ /* Remove queue head */
+ hp = hosts;
+ hosts = hp->next;
+
+ notify_host(sock, hp);
+
+ /* Set the timeout for this call, using an
+ exponential timeout strategy */
+ wait = hp->timeout;
+ if ((hp->timeout <<= 1) > NSM_MAX_TIMEOUT)
+ hp->timeout = NSM_MAX_TIMEOUT;
+ hp->send_next = now + wait;
+ hp->retries++;
+
+ insert_host(hp);
+ }
+
+ nsm_log(LOG_DEBUG, "Host %s due in %ld seconds",
+ hosts->name, wait);
+
+ pfd.fd = sock;
+ pfd.events = POLLIN;
+
+ wait *= 1000;
+ if (wait < 100)
+ wait = 100;
+ if (poll(&pfd, 1, wait) != 1)
+ continue;
+
+ recv_reply(sock);
+ }
+}
+
+/*
+ * Send notification to a single host
+ */
+void
+notify_host(int sock, struct nsm_host *host)
+{
+ static unsigned int xid = 0;
+ nsm_address dest;
+ uint32_t msgbuf[MAXMSGSIZE], *p;
+ unsigned int len;
+
+ if (!xid)
+ xid = getpid() + time(NULL);
+ if (!host->xid)
+ host->xid = xid++;
+
+ memset(msgbuf, 0, sizeof(msgbuf));
+ p = msgbuf;
+ *p++ = htonl(host->xid);
+ *p++ = 0;
+ *p++ = htonl(2);
+
+ /* If we retransmitted 4 times, reset the port to force
+ * a new portmap lookup (in case statd was restarted)
+ */
+ if (host->retries >= 4) {
+ addr_set_port(&host->addr, 0);
+ host->retries = 0;
+ }
+
+ dest = host->addr;
+ if (addr_get_port(&dest) == 0) {
+ /* Build a PMAP packet */
+ nsm_log(LOG_DEBUG, "Sending portmap query to %s", host->name);
+
+ addr_set_port(&dest, 111);
+ *p++ = htonl(100000);
+ *p++ = htonl(2);
+ *p++ = htonl(3);
+
+ /* Auth and verf */
+ *p++ = 0; *p++ = 0;
+ *p++ = 0; *p++ = 0;
+
+ *p++ = htonl(NSM_PROGRAM);
+ *p++ = htonl(NSM_VERSION);
+ *p++ = htonl(IPPROTO_UDP);
+ *p++ = 0;
+ } else {
+ /* Build an SM_NOTIFY packet */
+ nsm_log(LOG_DEBUG, "Sending SM_NOTIFY to %s", host->name);
+
+ *p++ = htonl(NSM_PROGRAM);
+ *p++ = htonl(NSM_VERSION);
+ *p++ = htonl(NSM_NOTIFY);
+
+ /* Auth and verf */
+ *p++ = 0; *p++ = 0;
+ *p++ = 0; *p++ = 0;
+
+ /* state change */
+ len = strlen(nsm_hostname);
+ *p++ = htonl(len);
+ memcpy(p, nsm_hostname, len);
+ p += (len + 3) >> 2;
+ *p++ = htonl(nsm_state);
+ }
+ len = (p - msgbuf) << 2;
+
+ sendto(sock, msgbuf, len, 0, (struct sockaddr *) &dest, sizeof(dest));
+}
+
+/*
+ * Receive reply from remote host
+ */
+void
+recv_reply(int sock)
+{
+ struct nsm_host *hp;
+ uint32_t msgbuf[MAXMSGSIZE], *p, *end;
+ uint32_t xid;
+ int res;
+
+ res = recv(sock, msgbuf, sizeof(msgbuf), 0);
+ if (res < 0)
+ return;
+
+ nsm_log(LOG_DEBUG, "Received packet...");
+
+ p = msgbuf;
+ end = p + (res >> 2);
+
+ xid = ntohl(*p++);
+ if (*p++ != htonl(1) /* must be REPLY */
+ || *p++ != htonl(0) /* must be ACCEPTED */
+ || *p++ != htonl(0) /* must be NULL verifier */
+ || *p++ != htonl(0)
+ || *p++ != htonl(0)) /* must be SUCCESS */
+ return;
+
+ /* Before we look at the data, find the host struct for
+ this reply */
+ if ((hp = find_host(xid)) == NULL)
+ return;
+
+ if (addr_get_port(&hp->addr) == 0) {
+ /* This was a portmap request */
+ unsigned int port;
+
+ port = ntohl(*p++);
+ if (p > end)
+ goto fail;
+
+ hp->send_next = time(NULL);
+ if (port == 0) {
+ /* No binding for statd. Delay the next
+ * portmap query for max timeout */
+ nsm_log(LOG_DEBUG, "No statd on %s", hp->name);
+ hp->timeout = NSM_MAX_TIMEOUT;
+ hp->send_next += NSM_MAX_TIMEOUT;
+ } else {
+ addr_set_port(&hp->addr, port);
+ if (hp->timeout >= NSM_MAX_TIMEOUT / 4)
+ hp->timeout = NSM_MAX_TIMEOUT / 4;
+ }
+ hp->xid = 0;
+ } else {
+ /* Successful NOTIFY call. Server returns void,
+ * so nothing we need to do here (except
+ * check that we didn't read past the end of the
+ * packet)
+ */
+ if (p <= end) {
+ nsm_log(LOG_DEBUG, "Host %s notified successfully", hp->name);
+ unlink(hp->path);
+ free(hp->name);
+ free(hp->path);
+ free(hp);
+ return;
+ }
+ }
+
+fail: /* Re-insert the host */
+ insert_host(hp);
+}
+
+/*
+ * Back up all hosts from the sm directory to sm.bak
+ */
+static void
+backup_hosts(const char *dirname, const char *bakname)
+{
+ struct dirent *de;
+ DIR *dir;
+
+ if (!(dir = opendir(dirname))) {
+ perror(dirname);
+ return;
+ }
+
+ while ((de = readdir(dir)) != NULL) {
+ char src[1024], dst[1024];
+
+ if (de->d_name[0] == '.')
+ continue;
+
+ snprintf(src, sizeof(src), "%s/%s", dirname, de->d_name);
+ snprintf(dst, sizeof(dst), "%s/%s", bakname, de->d_name);
+ if (rename(src, dst) < 0) {
+ nsm_log(LOG_WARNING,
+ "Failed to rename %s -> %s: %m",
+ src, dst);
+ }
+ }
+ closedir(dir);
+}
+
+/*
+ * Get all entries from sm.bak and convert them to host names
+ */
+static void
+get_hosts(const char *dirname)
+{
+ struct nsm_host *host;
+ struct dirent *de;
+ DIR *dir;
+
+ if (!(dir = opendir(dirname))) {
+ perror(dirname);
+ return;
+ }
+
+ host = NULL;
+ while ((de = readdir(dir)) != NULL) {
+ struct stat stb;
+ char path[1024];
+
+ if (de->d_name[0] == '.')
+ continue;
+ if (host == NULL)
+ host = calloc(1, sizeof(*host));
+
+ snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name);
+ if (!addr_parse(AF_INET, de->d_name, &host->addr)
+ && !addr_parse(AF_INET6, de->d_name, &host->addr)
+ && !host_lookup(AF_INET, de->d_name, &host->addr)) {
+ nsm_log(LOG_WARNING,
+ "%s doesn't seem to be a valid address, skipped",
+ de->d_name);
+ unlink(path);
+ continue;
+ }
+
+ if (stat(path, &stb) < 0)
+ continue;
+ host->last_used = stb.st_mtime;
+ host->timeout = NSM_TIMEOUT;
+ host->path = strdup(path);
+ host->name = strdup(de->d_name);
+
+ insert_host(host);
+ host = NULL;
+ }
+ closedir(dir);
+
+ if (host)
+ free(host);
+}
+
+/*
+ * Insert host into sorted list
+ */
+void
+insert_host(struct nsm_host *host)
+{
+ struct nsm_host **where, *p;
+
+ where = &hosts;
+ while ((p = *where) != 0) {
+ /* Sort in ascending order of timeout */
+ if (host->send_next < p->send_next)
+ break;
+ /* If we have the same timeout, put the
+ * most recently used host first.
+ * This makes sure that "recent" hosts
+ * get notified first.
+ */
+ if (host->send_next == p->send_next
+ && host->last_used > p->last_used)
+ break;
+ where = &p->next;
+ }
+
+ host->next = *where;
+ *where = host;
+}
+
+/*
+ * Find host given the XID
+ */
+struct nsm_host *
+find_host(uint32_t xid)
+{
+ struct nsm_host **where, *p;
+
+ where = &hosts;
+ while ((p = *where) != 0) {
+ if (p->xid == xid) {
+ *where = p->next;
+ return p;
+ }
+ where = &p->next;
+ }
+ return NULL;
+}
+
+
+/*
+ * Retrieve the current NSM state
+ */
+unsigned int
+nsm_get_state(int update)
+{
+ char newfile[PATH_MAX];
+ int fd, state;
+
+ if ((fd = open(_SM_STATE_PATH, O_RDONLY)) < 0) {
+ if (!opt_quiet) {
+ nsm_log(LOG_WARNING, "%s: %m", _SM_STATE_PATH);
+ nsm_log(LOG_WARNING, "Creating %s, set initial state 1",
+ _SM_STATE_PATH);
+ }
+ state = 1;
+ update = 1;
+ } else {
+ if (read(fd, &state, sizeof(state)) != sizeof(state)) {
+ nsm_log(LOG_WARNING,
+ "%s: bad file size, setting state = 1",
+ _SM_STATE_PATH);
+ state = 1;
+ update = 1;
+ } else {
+ if (!(state & 1))
+ state += 1;
+ }
+ close(fd);
+ }
+
+ if (update) {
+ state += 2;
+ snprintf(newfile, sizeof(newfile),
+ "%s.new", _SM_STATE_PATH);
+ if ((fd = open(newfile, O_CREAT|O_WRONLY, 0644)) < 0) {
+ nsm_log(LOG_WARNING, "Cannot create %s: %m", newfile);
+ exit(1);
+ }
+ if (write(fd, &state, sizeof(state)) != sizeof(state)) {
+ nsm_log(LOG_WARNING,
+ "Failed to write state to %s", newfile);
+ exit(1);
+ }
+ close(fd);
+ if (rename(newfile, _SM_STATE_PATH) < 0) {
+ nsm_log(LOG_WARNING,
+ "Cannot create %s: %m", _SM_STATE_PATH);
+ exit(1);
+ }
+ sync();
+ }
+
+ return state;
+}
+
+/*
+ * Address handling utilities
+ */
+static int
+addr_parse(int af, const char *name, nsm_address *addr)
+{
+ void *ptr;
+
+ if (af == AF_INET)
+ ptr = &((struct sockaddr_in *) addr)->sin_addr;
+ else if (af == AF_INET6)
+ ptr = &((struct sockaddr_in6 *) addr)->sin6_addr;
+ else
+ return 0;
+ if (inet_pton(af, name, ptr) <= 0)
+ return 0;
+ ((struct sockaddr *) addr)->sa_family = af;
+ return 1;
+}
+
+int
+addr_get_port(nsm_address *addr)
+{
+ switch (((struct sockaddr *) addr)->sa_family) {
+ case AF_INET:
+ return ntohs(((struct sockaddr_in *) addr)->sin_port);
+ case AF_INET6:
+ return ntohs(((struct sockaddr_in6 *) addr)->sin6_port);
+ }
+ return 0;
+}
+
+static void
+addr_set_port(nsm_address *addr, int port)
+{
+ switch (((struct sockaddr *) addr)->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *) addr)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *) addr)->sin6_port = htons(port);
+ }
+}
+
+static int
+host_lookup(int af, const char *name, nsm_address *addr)
+{
+ struct addrinfo hints, *ai;
+ int okay = 0;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = af;
+
+ if (getaddrinfo(name, NULL, &hints, &ai) != 0)
+ return 0;
+
+ if (ai->ai_addrlen < sizeof(*addr)) {
+ memcpy(addr, ai->ai_addr, ai->ai_addrlen);
+ okay = 1;
+ }
+
+ freeaddrinfo(ai);
+ return okay;
+}
+
+/*
+ * Log a message
+ */
+void
+nsm_log(int fac, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (fac == LOG_DEBUG && !opt_debug)
+ return;
+
+ va_start(ap, fmt);
+ if (log_syslog)
+ vsyslog(fac, fmt, ap);
+ else {
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+}