daemons/cmirrord/local.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424

/*
 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU Lesser General Public License v.2.1.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
#include "logging.h"
#include "common.h"
#include "functions.h"
#include "link_mon.h"
#include "local.h"

#include <errno.h>
#include <sys/socket.h>
#include <linux/connector.h>
#include <linux/netlink.h>
#include <unistd.h>

#ifndef CN_IDX_DM
/* Kernel 2.6.31 is required to run this code */
#define CN_IDX_DM                       0x7     /* Device Mapper */
#define CN_VAL_DM_USERSPACE_LOG         0x1
#endif

static int cn_fd = -1;  /* Connector (netlink) socket fd */
static char recv_buf[2048];
static char send_buf[2048];


/* FIXME: merge this function with kernel_send_helper */
static int kernel_ack(uint32_t seq, int error)
{
	int r;
	struct nlmsghdr *nlh = (struct nlmsghdr *)send_buf;
	struct cn_msg *msg = NLMSG_DATA(nlh);

	if (error < 0) {
		LOG_ERROR("Programmer error: error codes must be positive");
		return -EINVAL;
	}

	memset(send_buf, 0, sizeof(send_buf));

	nlh->nlmsg_seq = 0;
	nlh->nlmsg_pid = getpid();
	nlh->nlmsg_type = NLMSG_DONE;
	nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct cn_msg));
	nlh->nlmsg_flags = 0;

	msg->len = 0;
	msg->id.idx = CN_IDX_DM;
	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
	msg->seq = seq;
	msg->ack = error;

	r = send(cn_fd, nlh, NLMSG_LENGTH(sizeof(struct cn_msg)), 0);
	/* FIXME: do better error processing */
	if (r <= 0)
		return -EBADE;

	return 0;
}


/*
 * kernel_recv
 * @rq: the newly allocated request from kernel
 *
 * Read requests from the kernel and allocate space for the new request.
 * If there is no request from the kernel, *rq is NULL.
 *
 * This function is not thread safe due to returned stack pointer.  In fact,
 * the returned pointer must not be in-use when this function is called again.
 *
 * Returns: 0 on success, -EXXX on error
 */
static int kernel_recv(struct clog_request **rq)
{
	int r = 0;
	ssize_t len;
	char *foo;
	struct cn_msg *msg;
	struct dm_ulog_request *u_rq;
	struct nlmsghdr *nlmsg_h;

	*rq = NULL;
	memset(recv_buf, 0, sizeof(recv_buf));

	len = recv(cn_fd, recv_buf, sizeof(recv_buf), 0);
	if (len < 0) {
		LOG_ERROR("Failed to recv message from kernel");
		r = -errno;
		goto fail;
	}

	nlmsg_h = (struct nlmsghdr *)recv_buf;
	switch (nlmsg_h->nlmsg_type) {
	case NLMSG_ERROR:
		LOG_ERROR("Unable to recv message from kernel: NLMSG_ERROR");
		r = -EBADE;
		goto fail;
	case NLMSG_DONE:
		msg = (struct cn_msg *)NLMSG_DATA((struct nlmsghdr *)recv_buf);
		len -= (ssize_t)sizeof(struct nlmsghdr);

		if (len < (ssize_t)sizeof(struct cn_msg)) {
			LOG_ERROR("Incomplete request from kernel received");
			r = -EBADE;
			goto fail;
		}

		if (msg->len > DM_ULOG_REQUEST_SIZE) {
			LOG_ERROR("Not enough space to receive kernel request (%d/%d)",
				  msg->len, DM_ULOG_REQUEST_SIZE);
			r = -EBADE;
			goto fail;
		}

		if (!msg->len)
			LOG_ERROR("Zero length message received");

		len -= (ssize_t)sizeof(struct cn_msg);

		if (len < msg->len)
			LOG_ERROR("len = %zd, msg->len = %" PRIu16, len, msg->len);

		msg->data[msg->len] = '\0'; /* Cleaner way to ensure this? */
		u_rq = (struct dm_ulog_request *)msg->data;

		if (!u_rq->request_type) {
			LOG_DBG("Bad transmission, requesting resend [%u]",
				msg->seq);
			r = -EAGAIN;

			if (kernel_ack(msg->seq, EAGAIN)) {
				LOG_ERROR("Failed to NACK kernel transmission [%u]",
					  msg->seq);
				r = -EBADE;
			}
		}

		/*
		 * Now we've got sizeof(struct cn_msg) + sizeof(struct nlmsghdr)
		 * worth of space that precede the request structure from the
		 * kernel.  Since that space isn't going to be used again, we
		 * can take it for our purposes; rather than allocating a whole
		 * new structure and doing a memcpy.
		 *
		 * We should really make sure 'clog_request' doesn't grow
		 * beyond what is available to us, but we need only check it
		 * once... perhaps at compile time?
		 */
		foo = (char *)u_rq;
		foo -= (sizeof(struct clog_request) - sizeof(struct dm_ulog_request));
		*rq = (struct clog_request *) foo;

		/* Clear the wrapper container fields */
		memset(*rq, 0, (size_t)((char *)u_rq - (char *)(*rq)));
		break;
	default:
		LOG_ERROR("Unknown nlmsg_type");
		r = -EBADE;
	}

fail:
	if (r)
		*rq = NULL;

	return (r == -EAGAIN) ? 0 : r;
}

static int kernel_send_helper(void *data, uint16_t out_size)
{
	int r;
	struct nlmsghdr *nlh;
	struct cn_msg *msg;

	memset(send_buf, 0, sizeof(send_buf));

	nlh = (struct nlmsghdr *)send_buf;
	nlh->nlmsg_seq = 0;  /* FIXME: Is this used? */
	nlh->nlmsg_pid = getpid();
	nlh->nlmsg_type = NLMSG_DONE;
	nlh->nlmsg_len = NLMSG_LENGTH(out_size + sizeof(struct cn_msg));
	nlh->nlmsg_flags = 0;

	msg = NLMSG_DATA(nlh);
	memcpy(msg->data, data, out_size);
	msg->len = out_size;
	msg->id.idx = CN_IDX_DM;
	msg->id.val = CN_VAL_DM_USERSPACE_LOG;
	msg->seq = 0;

	r = send(cn_fd, nlh, NLMSG_LENGTH(out_size + sizeof(struct cn_msg)), 0);
	/* FIXME: do better error processing */
	if (r <= 0)
		return -EBADE;

	return 0;
}

/*
 * do_local_work
 *
 * Any processing errors are placed in the 'rq'
 * structure to be reported back to the kernel.
 * It may be pointless for this function to
 * return an int.
 *
 * Returns: 0 on success, -EXXX on failure
 */
static int do_local_work(void *data __attribute__((unused)))
{
	int r;
	struct clog_request *rq;
	struct dm_ulog_request *u_rq = NULL;

	r = kernel_recv(&rq);
	if (r)
		return r;

	if (!rq)
		return 0;

	u_rq = &rq->u_rq;
	LOG_DBG("[%s]  Request from kernel received: [%s/%u]",
		SHORT_UUID(u_rq->uuid), RQ_TYPE(u_rq->request_type),
		u_rq->seq);
	switch (u_rq->request_type) {
	case DM_ULOG_CTR:
	case DM_ULOG_DTR:
	case DM_ULOG_GET_REGION_SIZE:
	case DM_ULOG_IN_SYNC:
	case DM_ULOG_GET_SYNC_COUNT:
	case DM_ULOG_STATUS_TABLE:
	case DM_ULOG_PRESUSPEND:
		/* We do not specify ourselves as server here */
		r = do_request(rq, 0);
		if (r)
			LOG_DBG("Returning failed request to kernel [%s]",
				RQ_TYPE(u_rq->request_type));
		r = kernel_send(u_rq);
		if (r)
			LOG_ERROR("Failed to respond to kernel [%s]",
				  RQ_TYPE(u_rq->request_type));

		break;
	case DM_ULOG_RESUME:
		/*
		 * Resume is a special case that requires a local
		 * component to join the CPG, and a cluster component
		 * to handle the request.
		 */
		r = local_resume(u_rq);
		if (r) {
			LOG_DBG("Returning failed request to kernel [%s]",
				RQ_TYPE(u_rq->request_type));
			r = kernel_send(u_rq);
			if (r)
				LOG_ERROR("Failed to respond to kernel [%s]",
					  RQ_TYPE(u_rq->request_type));
			break;
		}
		/* ELSE, fall through */
	case DM_ULOG_IS_CLEAN:
	case DM_ULOG_FLUSH:
	case DM_ULOG_MARK_REGION:
	case DM_ULOG_GET_RESYNC_WORK:
	case DM_ULOG_SET_REGION_SYNC:
	case DM_ULOG_STATUS_INFO:
	case DM_ULOG_IS_REMOTE_RECOVERING:
	case DM_ULOG_POSTSUSPEND:
		r = cluster_send(rq);
		if (r) {
			u_rq->data_size = 0;
			u_rq->error = r;
			if (kernel_send(u_rq))
				LOG_ERROR("Failed to respond to kernel [%s]",
					  RQ_TYPE(u_rq->request_type));
		}

		break;
	case DM_ULOG_CLEAR_REGION:
		r = kernel_ack(u_rq->seq, 0);

		r = cluster_send(rq);
		if (r) {
			/*
			 * FIXME: store error for delivery on flush
			 *        This would allow us to optimize MARK_REGION
			 *        too.
			 */
		}

		break;
	default:
		LOG_ERROR("Invalid log request received (%u), ignoring.",
			  u_rq->request_type);

		return 0;
	}

	if (r && !u_rq->error)
		u_rq->error = r;

	return r;
}

/*
 * kernel_send
 * @u_rq: result to pass back to kernel
 *
 * This function returns the u_rq structure
 * (containing the results) to the kernel.
 * It then frees the structure.
 *
 * WARNING: should the structure be freed if
 * there is an error?  I vote 'yes'.  If the
 * kernel doesn't get the response, it should
 * resend the request.
 *
 * Returns: 0 on success, -EXXX on failure
 */
int kernel_send(struct dm_ulog_request *u_rq)
{
	int r;
	uint16_t size;

	if (!u_rq)
		return -EINVAL;

	size = (uint16_t)(sizeof(struct dm_ulog_request) + u_rq->data_size);

	if (!u_rq->data_size && !u_rq->error) {
		/* An ACK is all that is needed */

		/* FIXME: add ACK code */
	} else if (size > DM_ULOG_REQUEST_SIZE) {
		/*
		 * If we gotten here, we've already overrun
		 * our allotted space somewhere.
		 *
		 * We must do something, because the kernel
		 * is waiting for a response.
		 */
		LOG_ERROR("Not enough space to respond to server");
		u_rq->error = -ENOSPC;
		size = sizeof(struct dm_ulog_request);
	}

	r = kernel_send_helper(u_rq, size);
	if (r)
		LOG_ERROR("Failed to send msg to kernel.");

	return r;
}

/*
 * init_local
 *
 * Initialize kernel communication socket (netlink)
 *
 * Returns: 0 on success, values from common.h on failure
 */
int init_local(void)
{
	int r = 0;
	unsigned opt;
	struct sockaddr_nl addr;

	cn_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
	if (cn_fd < 0)
		return EXIT_KERNEL_SOCKET;

	/* memset to fix valgrind complaint */
	memset(&addr, 0, sizeof(struct sockaddr_nl));

	addr.nl_family = AF_NETLINK;
	addr.nl_groups = CN_IDX_DM;
	addr.nl_pid = 0;

	r = bind(cn_fd, (struct sockaddr *) &addr, sizeof(addr));
	if (r < 0) {
		if (close(cn_fd))
			LOG_ERROR("Failed to close socket: %s",
				  strerror(errno));
		return EXIT_KERNEL_BIND;
	}

	opt = addr.nl_groups;
	r = setsockopt(cn_fd, 270, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
	if (r) {
		if (close(cn_fd))
			LOG_ERROR("Failed to close socket: %s",
				  strerror(errno));
		return EXIT_KERNEL_SETSOCKOPT;
	}

	/*
	r = fcntl(cn_fd, F_SETFL, FNDELAY);
	*/

	links_register(cn_fd, "local", do_local_work, NULL);

	return 0;
}

/*
 * cleanup_local
 *
 * Clean up before exiting
 */
void cleanup_local(void)
{
	links_unregister(cn_fd);
	if (cn_fd >= 0 && close(cn_fd))
		LOG_ERROR("Failed to close socket: %s",
			  strerror(errno));
}