summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2009-05-18 11:08:53 -0400
committerSteve Dickson <steved@redhat.com>2009-05-18 11:08:53 -0400
commit5d253e3e326bfcf0e8a342bca53f1b4db120a7a9 (patch)
tree54dba165f400d6ee912710bc5cb44affb6928eb0
parent3ab7ab5db0f825fdd95d017cdd6d6ee5d207dbe8 (diff)
downloadnfs-utils-5d253e3e326bfcf0e8a342bca53f1b4db120a7a9.tar.gz
nfs-utils-5d253e3e326bfcf0e8a342bca53f1b4db120a7a9.tar.xz
nfs-utils-5d253e3e326bfcf0e8a342bca53f1b4db120a7a9.zip
sm-notify: Failed DNS lookups should be retried
Currently, if getaddrinfo(3) fails when trying to resolve a hostname, sm-notify gives up immediately on that host. If sm-notify is started before network service is available on a system, that means it quits without notifying anyone. Or, if DNS service isn't available due to a network partition or because the DNS server crashed, sm-notify will simply remove all of its callback files and exit. Really, sm-notify should try harder. We know that the hostnames passed in to notify_host() have already been vetted by statd, which won't monitor a hostname that it can't resolve. So it's likely that any DNS failure we meet here is a temporary condition. If it isn't, then sm-notify will stop trying to notify that host in 15 minutes anyway. [ The host's file is left in /var/lib/nfs/sm.bak in this case, but sm.bak is not read again until the next time sm-notify runs. ] sm-notify already has retry logic for handling RPC timeouts. We can co-opt that to drive DNS resolution retries. We also add AI_ADDRCONFIG because on systems whose network startup is handled by NetworkManager, there appears to be a bug that causes processes that started calling getaddinfo(3) before the network came up to continue getting EAI_AGAIN even after the network is fully operating. As I understand it, legacy glibc (before AI_ADDRCONFIG was exposed in headers) sets AI_ADDRCONFIG by default, although I haven't checked this. In any event, pre-glibc-2.2 systems probably won't run NetworkManager anyway, so this may not be much of a problem for them. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Steve Dickson <steved@redhat.com>
-rw-r--r--utils/statd/sm-notify.c39
1 files changed, 27 insertions, 12 deletions
diff --git a/utils/statd/sm-notify.c b/utils/statd/sm-notify.c
index 78d0a59..72dcff4 100644
--- a/utils/statd/sm-notify.c
+++ b/utils/statd/sm-notify.c
@@ -118,17 +118,33 @@ static void smn_set_port(struct sockaddr *sap, const unsigned short port)
}
}
-static struct addrinfo *smn_lookup(const sa_family_t family, const char *name)
+static struct addrinfo *smn_lookup(const char *name)
{
struct addrinfo *ai, hint = {
- .ai_family = family,
+#if HAVE_DECL_AI_ADDRCONFIG
+ .ai_flags = AI_ADDRCONFIG,
+#endif /* HAVE_DECL_AI_ADDRCONFIG */
+ .ai_family = AF_INET,
.ai_protocol = IPPROTO_UDP,
};
+ int error;
+
+ error = getaddrinfo(name, NULL, &hint, &ai);
+ switch (error) {
+ case 0:
+ return ai;
+ case EAI_SYSTEM:
+ if (opt_debug)
+ nsm_log(LOG_ERR, "getaddrinfo(3): %s",
+ strerror(errno));
+ break;
+ default:
+ if (opt_debug)
+ nsm_log(LOG_ERR, "getaddrinfo(3): %s",
+ gai_strerror(error));
+ }
- if (getaddrinfo(name, NULL, &hint, &ai) != 0)
- return NULL;
-
- return ai;
+ return NULL;
}
static void smn_forget_host(struct nsm_host *host)
@@ -291,7 +307,7 @@ notify(void)
/* Bind source IP if provided on command line */
if (opt_srcaddr) {
- struct addrinfo *ai = smn_lookup(AF_INET, opt_srcaddr);
+ struct addrinfo *ai = smn_lookup(opt_srcaddr);
if (!ai) {
nsm_log(LOG_ERR,
"Not a valid hostname or address: \"%s\"",
@@ -402,13 +418,12 @@ notify_host(int sock, struct nsm_host *host)
host->xid = xid++;
if (host->ai == NULL) {
- host->ai = smn_lookup(AF_UNSPEC, host->name);
+ host->ai = smn_lookup(host->name);
if (host->ai == NULL) {
nsm_log(LOG_WARNING,
- "%s doesn't seem to be a valid address,"
- " skipped", host->name);
- smn_forget_host(host);
- return 1;
+ "DNS resolution of %s failed; "
+ "retrying later", host->name);
+ return 0;
}
}