summaryrefslogtreecommitdiffstats
path: root/hbeat.c
blob: c2bb9c9e77fff63052a53358fd7988f95da42499 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/*
 * hbeat.c --
 *
 *	Simple heartbeating.
 *
 * Copyright © 2005,2006  Red Hat, Inc. All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions of the
 * GNU General Public License v.2.  This program is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
 * including the implied warranties of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE. See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Any Red Hat
 * trademarks that are incorporated in the source code or documentation are not
 * subject to the GNU General Public License and may only be used or replicated
 * with the express permission of Red Hat, Inc.
 *
 * Red Hat Author(s): Nathan Straz <nstraz@redhat.com>
 *                    Dean Jansa <djansa@redhat.com>
 */

#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <time.h>
#include <stdio.h>

#include "btime.h"
#include "hbeat.h"


struct hbeat_s {
	char *host;
	int max_timeout;
	hbeat_state_t rhost_state;
	unsigned int last_rhost_btime;
	time_t start_quiet_time;
};


/*
 * hbeat_init --
 *
 *	Init a heartbeat to host, with max_timeout as supplied.
 *
 *	Returns: 
 *	A hbeat handle, or NULL on error;
 */

hbeat_t 
hbeat_init(const char *host, int max_timeout)
{
	struct hbeat_s *hbeatp;

	hbeatp = malloc(sizeof *hbeatp);
	if (!hbeatp) {
		return NULL;
	}

	hbeatp->host = strdup(host);
	hbeatp->max_timeout = max_timeout;
	hbeatp->rhost_state = HOST_ALIVE;
	hbeatp->last_rhost_btime = 0;
	hbeatp->start_quiet_time = 0;

	srandom(time(0)); /* for gen_cookie */

	return hbeatp;
}


/*
 * hbeat_free --
 *
 *	Free a hbeat handle.
 */

void 
hbeat_free(hbeat_t hbh)
{
	struct hbeat_s *hbeatp = hbh;
	
	free(hbeatp->host);
	free(hbeatp);
}
	

/*
 * hbeat --
 *
 *	Attempt to contact host in the hbeat handle, run the hbeat
 *	"state machine" to decide if we the host is still "alive."
 *
 *	Returns:
 *		1 if host has been active or responded withing max_timeout secs.
 *		0 if host is dead, no response for > max_timeout secs.
 */

unsigned int
hbeat(hbeat_t hbh)
{
	struct hbeat_s *hbeatp = hbh;
	unsigned int hbeat, hbeat2;
	time_t current_time;
	int retval;

	/* User disabled heart beating */
	if (!hbeatp->max_timeout) {
		hbeatp->rhost_state = HOST_HBEAT_DISABLED;
		return 1;
	}

	hbeat = btime(hbeatp->host);
	/* quickly sanity check that we're getting the same
	 * hbeat every time we ask for it
	 */
	if (hbeat && hbeatp->last_rhost_btime 
			&& hbeat != hbeatp->last_rhost_btime) {
		hbeat2 = btime(hbeatp->host);
		if (hbeat != hbeat2) {
			fprintf(stderr, "Got conflicting hbeat times (%d and %d), discarding both\n", hbeat, hbeat2);
			hbeat = btime(hbeatp->host);
		}
	}
	current_time = time(NULL);

	if (hbeat) { switch(hbeatp->rhost_state) {
		/* Remote Host is Responding */
		case HOST_REBOOT:
		case HOST_TIMEOUT:
			retval = 0;
			break;
		case HOST_QUIET:
			hbeatp->rhost_state = HOST_ALIVE;
			hbeatp->start_quiet_time = 0;
		case HOST_ALIVE:
			if (hbeatp->last_rhost_btime == 0) {
				hbeatp->last_rhost_btime = hbeat;
				retval = 1;
			} else if (abs(hbeat - hbeatp->last_rhost_btime) > 5) {
				fprintf(stderr, "Host rebooted (%d, %d)\n",
						hbeat, hbeatp->last_rhost_btime);
				hbeatp->rhost_state = HOST_REBOOT;
				retval = 0;
			} else {
				hbeatp->last_rhost_btime = hbeat;
				retval = 1;
			}
			break;

		case HOST_HBEAT_DISABLED:
			retval = 1;
			break;
	}} else { switch (hbeatp->rhost_state) {
		/* Remote Host is NOT Responding */
			case HOST_ALIVE:
				hbeatp->rhost_state = HOST_QUIET;
				hbeatp->start_quiet_time = time(NULL);
				retval = 1;
				break;
			
			case HOST_QUIET:
				if (current_time - hbeatp->start_quiet_time 
		     	      > hbeatp->max_timeout) {
					hbeatp->rhost_state = HOST_TIMEOUT;
					retval = 0;
				} else {
					retval = 1;
				}
				break;

			case HOST_TIMEOUT:
			case HOST_REBOOT:
				retval = 0;
				break;

			case HOST_HBEAT_DISABLED:
				retval = 1;
				break;
	}}

	return retval;	
}


hbeat_state_t 
hbeat_getstate(hbeat_t hbh)
{
	return ((struct hbeat_s *)hbh)->rhost_state;
}


void
hbeat_setstate(hbeat_t hbh, hbeat_state_t state)
{
	((struct hbeat_s *)hbh)->rhost_state = state;
	return;
}

int
hbeat_getmaxtimeout(hbeat_t hbh)
{
	return ((struct hbeat_s *)hbh)->max_timeout;
}