From dae0822698327e81f467c3594141d70cdafca331 Mon Sep 17 00:00:00 2001 From: Petr Rockai Date: Thu, 23 Feb 2012 13:11:07 +0000 Subject: The lvmetad client-side integration. Only active when use_lvmetad = 1 is set in lvm.conf *and* lvmetad is running. --- lib/cache/lvmetad.c | 633 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 lib/cache/lvmetad.c (limited to 'lib/cache/lvmetad.c') diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c new file mode 100644 index 00000000..a2308f49 --- /dev/null +++ b/lib/cache/lvmetad.c @@ -0,0 +1,633 @@ +#include "lib.h" +#include "toolcontext.h" +#include "metadata.h" +#include "device.h" +#include "lvmetad.h" +#include "lvmcache.h" +#include "lvmetad-client.h" +#include "format-text.h" // TODO for disk_locn, used as a DA representation +#include "filter.h" + +static int _using_lvmetad = 0; +static daemon_handle _lvmetad; + +void lvmetad_init(void) +{ + const char *socket = getenv("LVM_LVMETAD_SOCKET"); + if (_using_lvmetad) { /* configured by the toolcontext */ + _lvmetad = lvmetad_open(socket ?: DEFAULT_RUN_DIR "/lvmetad.socket"); + if (_lvmetad.socket_fd < 0) { + log_warn("Failed to connect to lvmetad. Falling back to scanning."); + _using_lvmetad = 0; + } + } +} + +/* + * Helper; evaluate the reply from lvmetad, check for errors, print diagnostics + * and return a summary success/failure exit code. Frees up the reply resources + * as well. + */ +static int _lvmetad_handle_reply(daemon_reply reply, const char *action, const char *object) { + if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + log_error("Request to %s %s in lvmetad has failed. Reason: %s", + action, object, reply.error ? strerror(reply.error) : + daemon_reply_str(reply, "reason", "Unknown.")); + daemon_reply_destroy(reply); + return 0; + } + + daemon_reply_destroy(reply); + return 1; +} + +static int _read_mda(struct lvmcache_info *info, + struct format_type *fmt, + const struct dm_config_node *cn) +{ + struct metadata_area_ops *ops; + struct metadata_area *mda = NULL; + dm_list_iterate_items(ops, &fmt->mda_ops) { + if (ops->mda_import_text && ops->mda_import_text(info, cn)) + return 1; + } + return 0; +} + +static struct lvmcache_info *_pv_populate_lvmcache( + struct cmd_context *cmd, struct dm_config_node *cn, dev_t fallback) +{ + const char *pvid_txt = dm_config_find_str(cn->child, "id", NULL), + *vgid_txt = dm_config_find_str(cn->child, "vgid", NULL), + *vgname = dm_config_find_str(cn->child, "vgname", NULL), + *fmt_name = dm_config_find_str(cn->child, "format", NULL); + dev_t devt = dm_config_find_int(cn->child, "device", 0); + uint64_t devsize = dm_config_find_int(cn->child, "dev_size", 0), + label_sector = dm_config_find_int(cn->child, "label_sector", 0); + + struct format_type *fmt = fmt_name ? get_format_by_name(cmd, fmt_name) : NULL; + + if (!fmt) { + log_warn("No format for PV %s. It is probably missing.", pvid_txt); + return_NULL; + } + + struct device *device = dev_cache_get_by_devt(devt, cmd->filter); + struct id pvid, vgid; + + if (!device && fallback) + device = dev_cache_get_by_devt(fallback, cmd->filter); + + if (!device) { + log_warn("No device for PV %s.", pvid_txt); + return_NULL; + } + + if (!pvid_txt || !id_read_format(&pvid, pvid_txt)) { + log_warn("Missing or ill-formatted PVID for PV: %s.", pvid_txt); + return_NULL; + } + + if (vgid_txt) + id_read_format(&vgid, vgid_txt); + else + strcpy((char*)&vgid, fmt->orphan_vg_name); + + if (!vgname) + vgname = fmt->orphan_vg_name; + + struct lvmcache_info *info = + lvmcache_add(fmt->labeller, (const char *)&pvid, device, + vgname, (const char *)&vgid, 0); + + lvmcache_get_label(info)->sector = label_sector; + lvmcache_set_device_size(info, devsize); + lvmcache_del_das(info); + lvmcache_del_mdas(info); + + int i = 0; + struct dm_config_node *mda = NULL; + do { + char mda_id[32]; + sprintf(mda_id, "mda%d", i); + mda = dm_config_find_node(cn->child, mda_id); + if (mda) + _read_mda(info, fmt, mda); + ++i; + } while (mda); + + i = 0; + struct dm_config_node *da = NULL; + do { + char da_id[32]; + sprintf(da_id, "da%d", i); + da = dm_config_find_node(cn->child, da_id); + if (da) { + uint64_t offset, size; + if (!dm_config_get_uint64(da->child, "offset", &offset)) return_0; + if (!dm_config_get_uint64(da->child, "size", &size)) return_0; + lvmcache_add_da(info, offset, size); + } + ++i; + } while (da); + + return info; +} + +struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgname, const char *vgid) +{ + if (!_using_lvmetad) + return NULL; + + struct volume_group *vg = NULL; + daemon_reply reply; + if (vgid) { + char uuid[64]; + id_write_format((struct id*)vgid, uuid, 64); + reply = daemon_send_simple(_lvmetad, "vg_lookup", "uuid = %s", uuid, NULL); + } else { + if (!vgname) + log_error(INTERNAL_ERROR "VG name required (VGID not available)"); + reply = daemon_send_simple(_lvmetad, "vg_lookup", "name = %s", vgname, NULL); + } + + if (!strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + + struct dm_config_node *top = dm_config_find_node(reply.cft->root, "metadata"); + const char *name = daemon_reply_str(reply, "name", NULL); + + struct format_instance *fid; + struct format_instance_ctx fic; + + /* fall back to lvm2 if we don't know better */ + const char *fmt_name = dm_config_find_str(top, "metadata/format", "lvm2"); + struct format_type *fmt = get_format_by_name(cmd, fmt_name); + if (!fmt) { + log_error(INTERNAL_ERROR + "We do not know the format (%s) reported by lvmetad.", + fmt_name); + return NULL; + } + + fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; + fic.context.vg_ref.vg_name = name; + fic.context.vg_ref.vg_id = vgid; + + if (!(fid = fmt->ops->create_instance(fmt, &fic))) + return_NULL; + + struct dm_config_node *pvcn = + dm_config_find_node(top, "metadata/physical_volumes")->child; + while (pvcn) { + _pv_populate_lvmcache(cmd, pvcn, 0); + pvcn = pvcn->sib; + } + + top->key = name; + vg = import_vg_from_config_tree(reply.cft, fid); + + struct pv_list *pvl; + dm_list_iterate_items(pvl, &vg->pvs) { + struct lvmcache_info *info = + lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0); + if (info) { + pvl->pv->label_sector = lvmcache_get_label(info)->sector; + pvl->pv->dev = lvmcache_device(info); + lvmcache_fid_add_mdas_pv(info, fid); + } /* else probably missing */ + } + + lvmcache_update_vg(vg, 0); + } + + daemon_reply_destroy(reply); + return vg; +} + +struct _fixup_baton { + int i; + int find; + int ignore; +}; + +static int _fixup_ignored(struct metadata_area *mda, void *baton) { + struct _fixup_baton *b = baton; + if (b->i == b->find) + mda_set_ignored(mda, b->ignore); + b->i ++; + return 1; +} + +int lvmetad_vg_update(struct volume_group *vg) +{ + char *buf = NULL; + if (!vg) + return 0; + if (!_using_lvmetad) + return 1; /* fake it */ + + /* TODO. This is not entirely correct, since export_vg_to_buffer + * adds trailing nodes to the buffer. We may need to use + * export_vg_to_config_tree and format the buffer ourselves. It + * does, however, work for now, since the garbage is well + * formatted and has no conflicting keys with the rest of the + * request. */ + if (!export_vg_to_buffer(vg, &buf)) { + log_error("Could not format VG metadata."); + return_0; + } + + daemon_reply reply; + + reply = daemon_send_simple(_lvmetad, "vg_update", "vgname = %s", vg->name, + "metadata = %b", strchr(buf, '{'), + NULL); + + if (!_lvmetad_handle_reply(reply, "update VG", vg->name)) + return 0; + + struct dm_hash_node *n = (vg->fid && vg->fid->metadata_areas_index) ? + dm_hash_get_first(vg->fid->metadata_areas_index) : NULL; + while (n) { + struct metadata_area *mda = dm_hash_get_data(vg->fid->metadata_areas_index, n); + char mda_id[128], *num; + strcpy(mda_id, dm_hash_get_key(vg->fid->metadata_areas_index, n)); + if ((num = strchr(mda_id, '_'))) { + *num = 0; + ++num; + struct lvmcache_info *info = + lvmcache_info_from_pvid(mda_id, 0); + struct _fixup_baton baton = { .i = 0, .find = atoi(num), + .ignore = mda_is_ignored(mda) }; + if (info) + lvmcache_foreach_mda(info, _fixup_ignored, &baton); + } + n = dm_hash_get_next(vg->fid->metadata_areas_index, n); + } + + struct pv_list *pvl; + dm_list_iterate_items(pvl, &vg->pvs) { + /* NB. the PV fmt pointer is sometimes wrong during vgconvert */ + if (pvl->pv->dev && !lvmetad_pv_found(pvl->pv->id, pvl->pv->dev, + vg->fid ? vg->fid->fmt : pvl->pv->fmt, + pvl->pv->label_sector, NULL)) + return 0; + } + + return 1; +} + +int lvmetad_vg_remove(struct volume_group *vg) +{ + if (!_using_lvmetad) + return 1; /* just fake it */ + char uuid[64]; + id_write_format(&vg->id, uuid, 64); + daemon_reply reply = + daemon_send_simple(_lvmetad, "vg_remove", "uuid = %s", uuid, NULL); + + return _lvmetad_handle_reply(reply, "remove VG", vg->name); +} + +int lvmetad_pv_lookup(struct cmd_context *cmd, struct id pvid) +{ + if (!_using_lvmetad) + return_0; + + int result = 1; + char uuid[64]; + id_write_format(&pvid, uuid, 64); + + daemon_reply reply = + daemon_send_simple(_lvmetad, "pv_lookup", "uuid = %s", uuid, NULL); + + if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + _lvmetad_handle_reply(reply, "lookup PVs", ""); + return_0; + } + + struct dm_config_node *cn = dm_config_find_node(reply.cft->root, "physical_volume"); + if (!_pv_populate_lvmcache(cmd, cn, 0)) + result = 0; + + daemon_reply_destroy(reply); + return result; +} + +int lvmetad_pv_lookup_by_devt(struct cmd_context *cmd, dev_t device) +{ + if (!_using_lvmetad) + return_0; + + int result = 1; + + daemon_reply reply = + daemon_send_simple(_lvmetad, "pv_lookup", "device = %d", device, NULL); + + if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + _lvmetad_handle_reply(reply, "lookup PVs", ""); + return_0; + } + + struct dm_config_node *cn = dm_config_find_node(reply.cft->root, "physical_volume"); + if (!_pv_populate_lvmcache(cmd, cn, device)) + result = 0; + + daemon_reply_destroy(reply); + return result; +} + +int lvmetad_pv_list_to_lvmcache(struct cmd_context *cmd) +{ + if (!_using_lvmetad) + return_0; + + daemon_reply reply = + daemon_send_simple(_lvmetad, "pv_list", NULL); + + if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + _lvmetad_handle_reply(reply, "list PVs", ""); + return_0; + } + + struct dm_config_node *cn = dm_config_find_node(reply.cft->root, "physical_volumes")->child; + while (cn) { + _pv_populate_lvmcache(cmd, cn, 0); + cn = cn->sib; + } + + daemon_reply_destroy(reply); + return 1; +} + +int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd) +{ + if (!_using_lvmetad) + return_0; + + daemon_reply reply = + daemon_send_simple(_lvmetad, "vg_list", NULL); + + if (reply.error || strcmp(daemon_reply_str(reply, "response", ""), "OK")) { + _lvmetad_handle_reply(reply, "list VGs", ""); + return_0; + } + + struct dm_config_node *cn = dm_config_find_node(reply.cft->root, "volume_groups")->child; + while (cn) { + struct id vgid; + const char *vgid_txt = cn->key, + *name = dm_config_find_str(cn->child, "name", NULL); + id_read_format(&vgid, vgid_txt); + + cn = cn->sib; + + /* the call to lvmetad_vg_lookup will poke the VG into lvmcache */ + struct volume_group *tmp = lvmetad_vg_lookup(cmd, NULL, (const char*)&vgid); + release_vg(tmp); + } + + daemon_reply_destroy(reply); + return 1; +} + +struct _print_mda_baton { + int i; + char *buffer; +}; + +static int _print_mda(struct metadata_area *mda, void *baton) +{ + int result = 0; + struct _print_mda_baton *b = baton; + + if (!mda->ops->mda_export_text) /* do nothing */ + return 1; + + char *buf = b->buffer; + char *mda_txt = mda->ops->mda_export_text(mda); + if (!dm_asprintf(&b->buffer, "%s mda%i { %s }", b->buffer ?: "", b->i, mda_txt)) + goto_out; + b->i ++; + result = 1; +out: + dm_free(mda_txt); + dm_free(buf); + return result; +} + +static int _print_da(struct disk_locn *da, void *baton) +{ + if (!da) + return 1; + + struct _print_mda_baton *b = baton; + + char *buf = b->buffer; + if (!dm_asprintf(&b->buffer, "%s da%i { offset = %lld size = %lld }", + b->buffer ?: "", b->i, da->offset, da->size)) + { + dm_free(buf); + return_0; + } + b->i ++; + dm_free(buf); + return 1; +} + +static const char *_print_mdas(struct lvmcache_info *info) +{ + struct _print_mda_baton baton = { .i = 0, .buffer = NULL }; + if (!lvmcache_foreach_mda(info, &_print_mda, &baton)) + return NULL; + baton.i = 0; + if (!lvmcache_foreach_da(info, &_print_da, &baton)) + return NULL; + return baton.buffer; +} + +int lvmetad_pv_found(struct id pvid, struct device *device, const struct format_type *fmt, + uint64_t label_sector, struct volume_group *vg) +{ + if (!_using_lvmetad) + return 1; + + char uuid[64]; + + id_write_format(&pvid, uuid, 64); + + /* FIXME A more direct route would be much preferable. */ + struct lvmcache_info *info = lvmcache_info_from_pvid((const char *)&pvid, 0); + const char *mdas = NULL; + if (info) + mdas = _print_mdas(info); + + char *pvmeta; + if (!dm_asprintf(&pvmeta, + "{ device = %lld\n" + " dev_size = %lld\n" + " format = \"%s\"\n" + " label_sector = %lld\n" + " id = \"%s\"\n" + " %s" + "}", device->dev, info ? lvmcache_device_size(info) : 0, + fmt->name, label_sector, uuid, mdas ?: "")) + return_0; + + daemon_reply reply; + + if (vg) { + char *buf = NULL; + /* + * TODO. This is not entirely correct, since export_vg_to_buffer + * adds trailing garbage to the buffer. We may need to use + * export_vg_to_config_tree and format the buffer ourselves. It + * does, however, work for now, since the garbage is well + * formatted and has no conflicting keys with the rest of the + * request. + */ + export_vg_to_buffer(vg, &buf); + reply = daemon_send_simple(_lvmetad, + "pv_found", + "pvmeta = %b", pvmeta, + "vgname = %s", vg->name, + "metadata = %b", strchr(buf, '{'), + NULL); + } else { + /* There are no MDAs on this PV. */ + reply = daemon_send_simple(_lvmetad, + "pv_found", + "pvmeta = %b", pvmeta, + NULL); + } + + dm_free(pvmeta); + return _lvmetad_handle_reply(reply, "update PV", uuid); +} + +int lvmetad_pv_gone(dev_t device) +{ + daemon_reply reply = + daemon_send_simple(_lvmetad, "pv_gone", "device = %d", device, NULL); + + return _lvmetad_handle_reply(reply, "drop PV", ""); +} + +int lvmetad_active() +{ + return _using_lvmetad; +} + +void lvmetad_set_active(int active) +{ + _using_lvmetad = active; +} + +/* + * The following code implements pvscan --lvmetad. + */ + +struct _pvscan_lvmetad_baton { + struct volume_group *vg; + struct format_instance *fid; +}; + +static int _pvscan_lvmetad_single(struct metadata_area *mda, void *baton) +{ + struct _pvscan_lvmetad_baton *b = baton; + struct volume_group *this = mda->ops->vg_read(b->fid, "", mda); + if ((this && !b->vg) || this->seqno > b->vg->seqno) + b->vg = this; + else release_vg(this); + return 1; +} + +static dev_t _parse_devt(const char *str) { /* Oh. */ + char *where = (char *) str; + int major = strtol(str, &where, 10); + if (where == str) + return -1; + if (*where != ':') + return -1; + ++where; + str = where; + int minor = strtol(str, &where, 10); + if (where == str) + return -1; + if (*where) + return -1; + + return MKDEV(major, minor); +} + +int pvscan_lvmetad(struct cmd_context *cmd, int argc, char **argv) +{ + if (argc != 1) { + log_error("Exactly one device parameter required."); + return 0; + } + + if (!lvmetad_active()) { + log_error("Cannot proceed since lvmetad is not active."); + return 0; + } + + struct device *dev = dev_cache_get(argv[0], NULL); + if (!dev && _parse_devt(argv[0]) != -1) + dev = dev_cache_get_by_devt(_parse_devt(argv[0]), NULL); + + if (!dev) { + if (_parse_devt(argv[0]) == -1) { + log_error("For devices that do not exist, we need a MAJOR:MINOR pair."); + return 0; + } + + if (!lvmetad_pv_gone(_parse_devt(argv[0]))) + goto fatal; + + log_info("Device %s not found and was wiped from lvmetad.", argv[0]); + return 1; + } + + struct label *label; + if (!label_read(dev, &label, 0)) { + log_warn("No PV label found on %s.", dev_name(dev)); + if (!lvmetad_pv_gone(dev->dev)) + goto fatal; + return 1; + } + + struct lvmcache_info *info = (struct lvmcache_info *) label->info; + struct physical_volume pv; + memset(&pv, 0, sizeof(pv)); + + struct _pvscan_lvmetad_baton baton; + baton.vg = NULL; + + /* Create a dummy instance. */ + struct format_instance_ctx fic = { .type = 0 }; + baton.fid = + lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic); + struct metadata_area *mda; + + lvmcache_foreach_mda(info, _pvscan_lvmetad_single, &baton); + + /* + * NB. If this command failed and we are relying on lvmetad to have an + * *exact* image of the system, the lvmetad instance that went out of + * sync needs to be killed. + */ + if (!lvmetad_pv_found(*(struct id *)dev->pvid, dev, lvmcache_fmt(info), + label->sector, baton.vg)) + goto fatal; + + release_vg(baton.vg); + return 1; +fatal: + release_vg(baton.vg); + /* FIXME kill lvmetad automatically if we can */ + log_error("Update of lvmetad failed. This is a serious problem.\n " + "It is strongly recommended that you restart lvmetad immediately."); + return 0; +} + -- cgit