diff options
author | Richard Jones <rjones@redhat.com> | 2009-10-26 11:03:07 +0000 |
---|---|---|
committer | Richard Jones <rjones@redhat.com> | 2009-10-29 15:58:23 +0000 |
commit | 792c5283009ed6753239a14df9a6e9c71bea35fd (patch) | |
tree | dfe3bf8136e85adedbb3c9d98ef748f4b61f2ec0 /hivex/hivex.c | |
parent | d609130cb65fe7795b168a796426e39d64ceda97 (diff) | |
download | libguestfs-792c5283009ed6753239a14df9a6e9c71bea35fd.tar.gz libguestfs-792c5283009ed6753239a14df9a6e9c71bea35fd.tar.xz libguestfs-792c5283009ed6753239a14df9a6e9c71bea35fd.zip |
Support for Windows Registry.
In hivex/: This mini-library allows us to extract Windows
Registry binary files ("hives").
There are also two tools: hivexml converts a hive to a
self-describing XML format. hivexget can be used to extract
single subkeys from a hive.
New tool: virt-win-reg. This is a wrapper around the library
functionality allowing you to pull out data from the registries
of Windows guests.
Diffstat (limited to 'hivex/hivex.c')
-rw-r--r-- | hivex/hivex.c | 1398 |
1 files changed, 1398 insertions, 0 deletions
diff --git a/hivex/hivex.c b/hivex/hivex.c new file mode 100644 index 00000000..16be753b --- /dev/null +++ b/hivex/hivex.c @@ -0,0 +1,1398 @@ +/* hivex - Windows Registry "hive" extraction library. + * Copyright (C) 2009 Red Hat Inc. + * Derived from code by Petter Nordahl-Hagen under a compatible license: + * Copyright (c) 1997-2007 Petter Nordahl-Hagen. + * Derived from code by Markus Stephany under a compatible license: + * Copyright (c) 2000-2004, Markus Stephany. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * See file LICENSE for the full license. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <endian.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <iconv.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <assert.h> + +#include "hivex.h" + +struct hive_h { + int fd; + size_t size; + int msglvl; + + /* Memory-mapped (readonly) registry file. */ + union { + char *addr; + struct ntreg_header *hdr; + }; + + /* Use a bitmap to store which file offsets are valid (point to a + * used block). We only need to store 1 bit per 32 bits of the file + * (because blocks are 4-byte aligned). We found that the average + * block size in a registry file is ~50 bytes. So roughly 1 in 12 + * bits in the bitmap will be set, making it likely a more efficient + * structure than a hash table. + */ + char *bitmap; +#define BITMAP_SET(bitmap,off) (bitmap[(off)>>5] |= 1 << (((off)>>2)&7)) +#define BITMAP_CLR(bitmap,off) (bitmap[(off)>>5] &= ~ (1 << (((off)>>2)&7))) +#define BITMAP_TST(bitmap,off) (bitmap[(off)>>5] & (1 << (((off)>>2)&7))) +#define IS_VALID_BLOCK(h,off) \ + (((off) & 3) == 0 && \ + (off) >= 0x1000 && \ + (off) < (h)->size && \ + BITMAP_TST((h)->bitmap,(off))) + + /* Fields from the header, extracted from little-endianness. */ + size_t rootoffs; /* Root key offset (always an nk-block). */ + + /* Stats. */ + size_t pages; /* Number of hbin pages read. */ + size_t blocks; /* Total number of blocks found. */ + size_t used_blocks; /* Total number of used blocks found. */ + size_t used_size; /* Total size (bytes) of used blocks. */ +}; + +/* NB. All fields are little endian. */ +struct ntreg_header { + char magic[4]; /* "regf" */ + uint32_t unknown1; + uint32_t unknown2; + char last_modified[8]; + uint32_t unknown3; /* 1 */ + uint32_t unknown4; /* 3 */ + uint32_t unknown5; /* 0 */ + uint32_t unknown6; /* 1 */ + uint32_t offset; /* offset of root key record - 4KB */ + uint32_t blocks; /* size in bytes of data (filesize - 4KB) */ + uint32_t unknown7; /* 1 */ + char name[0x1fc-0x2c]; + uint32_t csum; /* checksum: sum of 32 bit words 0-0x1fb. */ +} __attribute__((__packed__)); + +struct ntreg_hbin_page { + char magic[4]; /* "hbin" */ + uint32_t offset_first; /* offset from 1st block */ + uint32_t offset_next; /* offset of next (relative to this) */ + char unknown[20]; + /* Linked list of blocks follows here. */ +} __attribute__((__packed__)); + +struct ntreg_hbin_block { + int32_t seg_len; /* length of this block (-ve for used block) */ + char id[2]; /* the block type (eg. "nk" for nk record) */ + /* Block data follows here. */ +} __attribute__((__packed__)); + +#define BLOCK_ID_EQ(h,offs,eqid) \ + (strncmp (((struct ntreg_hbin_block *)((h)->addr + (offs)))->id, (eqid), 2) == 0) + +static size_t +block_len (hive_h *h, size_t blkoff, int *used) +{ + struct ntreg_hbin_block *block; + block = (struct ntreg_hbin_block *) (h->addr + blkoff); + + int32_t len = le32toh (block->seg_len); + if (len < 0) { + if (used) *used = 1; + len = -len; + } else { + if (used) *used = 0; + } + + return (size_t) len; +} + +struct ntreg_nk_record { + int32_t seg_len; /* length (always -ve because used) */ + char id[2]; /* "nk" */ + uint16_t flags; + char timestamp[12]; + uint32_t parent; /* offset of owner/parent */ + uint32_t nr_subkeys; /* number of subkeys */ + uint32_t unknown1; + uint32_t subkey_lf; /* lf record containing list of subkeys */ + uint32_t unknown2; + uint32_t nr_values; /* number of values */ + uint32_t vallist; /* value-list record */ + uint32_t sk; /* offset of sk-record */ + uint32_t classname; /* offset of classname record */ + char unknown3[16]; + uint32_t unknown4; + uint16_t name_len; /* length of name */ + uint16_t classname_len; /* length of classname */ + char name[1]; /* name follows here */ +} __attribute__((__packed__)); + +struct ntreg_lf_record { + int32_t seg_len; + char id[2]; /* "lf" */ + uint16_t nr_keys; /* number of keys in this record */ + struct { + uint32_t offset; /* offset of nk-record for this subkey */ + char name[4]; /* first 4 characters of subkey name */ + } keys[1]; +} __attribute__((__packed__)); + +struct ntreg_ri_record { + int32_t seg_len; + char id[2]; /* "ri" */ + uint16_t nr_offsets; /* number of pointers to lh records */ + uint32_t offset[1]; /* list of pointers to lh records */ +} __attribute__((__packed__)); + +/* This has no ID header. */ +struct ntreg_value_list { + int32_t seg_len; + uint32_t offset[1]; /* list of pointers to vk records */ +} __attribute__((__packed__)); + +struct ntreg_vk_record { + int32_t seg_len; /* length (always -ve because used) */ + char id[2]; /* "vk" */ + uint16_t name_len; /* length of name */ + /* length of the data: + * If data_len is <= 4, then it's stored inline. + * If data_len is 0x80000000, then it's an inline dword. + * Top bit may be set or not set at random. + */ + uint32_t data_len; + uint32_t data_offset; /* pointer to the data (or data if inline) */ + hive_type data_type; /* type of the data */ + uint16_t unknown1; /* possibly always 1 */ + uint16_t unknown2; + char name[1]; /* key name follows here */ +} __attribute__((__packed__)); + +hive_h * +hivex_open (const char *filename, int flags) +{ + hive_h *h = NULL; + + h = calloc (1, sizeof *h); + if (h == NULL) + goto error; + + h->msglvl = flags & HIVEX_OPEN_MSGLVL_MASK; + + const char *debug = getenv ("HIVEX_DEBUG"); + if (debug && strcmp (debug, "1") == 0) + h->msglvl = 2; + + if (h->msglvl >= 2) + printf ("hivex_open: created handle %p\n", h); + + h->fd = open (filename, O_RDONLY); + if (h->fd == -1) + goto error; + + struct stat statbuf; + if (fstat (h->fd, &statbuf) == -1) + goto error; + + h->size = statbuf.st_size; + + h->addr = mmap (NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0); + if (h->addr == MAP_FAILED) + goto error; + + if (h->msglvl >= 2) + printf ("hivex_open: mapped file at %p\n", h->addr); + + /* Check header. */ + if (h->hdr->magic[0] != 'r' || + h->hdr->magic[1] != 'e' || + h->hdr->magic[2] != 'g' || + h->hdr->magic[3] != 'f') { + fprintf (stderr, "hivex: %s: not a Windows NT Registry hive file\n", + filename); + errno = ENOTSUP; + goto error; + } + + h->bitmap = calloc (1 + h->size / 32, 1); + +#if 0 /* Doesn't work. */ + /* Header checksum. */ + uint32_t *daddr = h->addr; + size_t i; + uint32_t sum = 0; + for (i = 0; i < 0x1fc / 4; ++i) { + sum += le32toh (*daddr); + daddr++; + } + if (sum != le32toh (h->hdr->csum)) { + fprintf (stderr, "hivex: %s: bad checksum in hive header\n", filename); + errno = EINVAL; + goto error; + } +#endif + + h->rootoffs = le32toh (h->hdr->offset) + 0x1000; + + if (h->msglvl >= 2) + printf ("hivex_open: root offset = %zu\n", h->rootoffs); + + /* We'll set this flag when we see a block with the root offset (ie. + * the root block). + */ + int seen_root_block = 0, bad_root_block = 0; + + /* Read the pages and blocks. The aim here is to be robust against + * corrupt or malicious registries. So we make sure the loops + * always make forward progress. We add the address of each block + * we read to a hash table so pointers will only reference the start + * of valid blocks. + */ + size_t off; + struct ntreg_hbin_page *page; + for (off = 0x1000; off < h->size; off += le32toh (page->offset_next)) { + h->pages++; + + page = (struct ntreg_hbin_page *) (h->addr + off); + if (page->magic[0] != 'h' || + page->magic[1] != 'b' || + page->magic[2] != 'i' || + page->magic[3] != 'n') { + /* This error is seemingly common in uncorrupt registry files. */ + /* + fprintf (stderr, "hivex: %s: ignoring trailing garbage at end of file (at %zu, after %zu pages)\n", + filename, off, h->pages); + */ + break; + } + + if (h->msglvl >= 2) + printf ("hivex_open: page at %zu\n", off); + + if (le32toh (page->offset_next) <= sizeof (struct ntreg_hbin_page) || + (le32toh (page->offset_next) & 3) != 0) { + fprintf (stderr, "hivex: %s: pagesize %d at %zu, bad registry\n", + filename, le32toh (page->offset_next), off); + errno = ENOTSUP; + goto error; + } + + /* Read the blocks in this page. */ + size_t blkoff; + struct ntreg_hbin_block *block; + int32_t seg_len; + for (blkoff = off + 0x20; + blkoff < off + le32toh (page->offset_next); + blkoff += seg_len) { + h->blocks++; + + int is_root = blkoff == h->rootoffs; + if (is_root) + seen_root_block = 1; + + block = (struct ntreg_hbin_block *) (h->addr + blkoff); + int used; + seg_len = block_len (h, blkoff, &used); + if (seg_len <= 4 || (seg_len & 3) != 0) { + fprintf (stderr, "hivex: %s: block size %d at %zu, bad registry\n", + filename, le32toh (block->seg_len), blkoff); + errno = ENOTSUP; + goto error; + } + + if (h->msglvl >= 2) + printf ("hivex_open: %s block id %d,%d at %zu%s\n", + used ? "used" : "free", block->id[0], block->id[1], blkoff, + is_root ? " (root)" : ""); + + if (is_root && !used) + bad_root_block = 1; + + if (used) { + h->used_blocks++; + h->used_size += seg_len; + + /* Root block must be an nk-block. */ + if (is_root && (block->id[0] != 'n' || block->id[1] != 'k')) + bad_root_block = 1; + + /* Note this blkoff is a valid address. */ + BITMAP_SET (h->bitmap, blkoff); + } + } + } + + if (!seen_root_block) { + fprintf (stderr, "hivex: %s: no root block found\n", filename); + errno = ENOTSUP; + goto error; + } + + if (bad_root_block) { + fprintf (stderr, "hivex: %s: bad root block (free or not nk)\n", filename); + errno = ENOTSUP; + goto error; + } + + if (h->msglvl >= 1) + printf ("hivex_open: successfully read Windows Registry hive file:\n" + " pages: %zu\n" + " blocks: %zu\n" + " blocks used: %zu\n" + " bytes used: %zu\n", + h->pages, h->blocks, h->used_blocks, h->used_size); + + return h; + + error:; + int err = errno; + if (h) { + free (h->bitmap); + if (h->addr && h->size && h->addr != MAP_FAILED) + munmap (h->addr, h->size); + if (h->fd >= 0) + close (h->fd); + free (h); + } + errno = err; + return NULL; +} + +int +hivex_close (hive_h *h) +{ + int r; + + free (h->bitmap); + munmap (h->addr, h->size); + r = close (h->fd); + free (h); + + return r; +} + +hive_node_h +hivex_root (hive_h *h) +{ + hive_node_h ret = h->rootoffs; + if (!IS_VALID_BLOCK (h, ret)) { + errno = ENOKEY; + return 0; + } + return ret; +} + +char * +hivex_node_name (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return NULL; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + /* AFAIK the node name is always plain ASCII, so no conversion + * to UTF-8 is necessary. However we do need to nul-terminate + * the string. + */ + + /* nk->name_len is unsigned, 16 bit, so this is safe ... However + * we have to make sure the length doesn't exceed the block length. + */ + size_t len = le16toh (nk->name_len); + size_t seg_len = block_len (h, node, NULL); + if (sizeof (struct ntreg_nk_record) + len - 1 > seg_len) { + if (h->msglvl >= 2) + printf ("hivex_node_name: returning EFAULT because node name is too long (%zu, %zu)\n", + len, seg_len); + errno = EFAULT; + return NULL; + } + + char *ret = malloc (len + 1); + if (ret == NULL) + return NULL; + memcpy (ret, nk->name, len); + ret[len] = '\0'; + return ret; +} + +#if 0 +/* I think the documentation for the sk and classname fields in the nk + * record is wrong, or else the offset field is in the wrong place. + * Otherwise this makes no sense. Disabled this for now -- it's not + * useful for reading the registry anyway. + */ + +hive_security_h +hivex_node_security (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return 0; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + hive_node_h ret = le32toh (nk->sk); + ret += 0x1000; + if (!IS_VALID_BLOCK (h, ret)) { + errno = EFAULT; + return 0; + } + return ret; +} + +hive_classname_h +hivex_node_classname (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return 0; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + hive_node_h ret = le32toh (nk->classname); + ret += 0x1000; + if (!IS_VALID_BLOCK (h, ret)) { + errno = EFAULT; + return 0; + } + return ret; +} +#endif + +hive_node_h * +hivex_node_children (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return NULL; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys); + + /* Deal with the common "no subkeys" case quickly. */ + hive_node_h *ret; + if (nr_subkeys_in_nk == 0) { + ret = malloc (sizeof (hive_node_h)); + if (ret == NULL) + return NULL; + ret[0] = 0; + return ret; + } + + /* Arbitrarily limit the number of subkeys we will ever deal with. */ + if (nr_subkeys_in_nk > 1000000) { + errno = ERANGE; + return NULL; + } + + /* The subkey_lf field can point either to an lf-record, which is + * the common case, or if there are lots of subkeys, to an + * ri-record. + */ + size_t subkey_lf = le32toh (nk->subkey_lf); + subkey_lf += 0x1000; + if (!IS_VALID_BLOCK (h, subkey_lf)) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because subkey_lf is not a valid block (%zu)\n", + subkey_lf); + errno = EFAULT; + return NULL; + } + + struct ntreg_hbin_block *block = + (struct ntreg_hbin_block *) (h->addr + subkey_lf); + + /* Points to lf-record? (Note, also "lh" but that is basically the + * same as "lf" as far as we are concerned here). + */ + if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) { + struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block; + + /* Check number of subkeys in the nk-record matches number of subkeys + * in the lf-record. + */ + size_t nr_subkeys_in_lf = le16toh (lf->nr_keys); + + if (h->msglvl >= 2) + printf ("hivex_node_children: nr_subkeys_in_nk = %zu, nr_subkeys_in_lf = %zu\n", + nr_subkeys_in_nk, nr_subkeys_in_lf); + + if (nr_subkeys_in_nk != nr_subkeys_in_lf) { + errno = ENOTSUP; + return NULL; + } + + size_t len = block_len (h, subkey_lf, NULL); + if (8 + nr_subkeys_in_lf * 8 > len) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because too many subkeys (%zu, %zu)\n", + nr_subkeys_in_lf, len); + errno = EFAULT; + return NULL; + } + + /* Allocate space for the returned values. Note that + * nr_subkeys_in_lf is limited to a 16 bit value. + */ + ret = malloc ((1 + nr_subkeys_in_lf) * sizeof (hive_node_h)); + if (ret == NULL) + return NULL; + + size_t i; + for (i = 0; i < nr_subkeys_in_lf; ++i) { + hive_node_h subkey = lf->keys[i].offset; + subkey += 0x1000; + if (!IS_VALID_BLOCK (h, subkey)) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because subkey is not a valid block (%zu)\n", + subkey); + errno = EFAULT; + free (ret); + return NULL; + } + ret[i] = subkey; + } + ret[i] = 0; + return ret; + } + /* Points to ri-record? */ + else if (block->id[0] == 'r' && block->id[1] == 'i') { + struct ntreg_ri_record *ri = (struct ntreg_ri_record *) block; + + size_t nr_offsets = le16toh (ri->nr_offsets); + + /* Count total number of children. */ + size_t i, count = 0; + for (i = 0; i < nr_offsets; ++i) { + hive_node_h offset = ri->offset[i]; + offset += 0x1000; + if (!IS_VALID_BLOCK (h, offset)) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because ri-offset is not a valid block (%zu)\n", + offset); + errno = EFAULT; + return NULL; + } + if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { + errno = ENOTSUP; + return NULL; + } + + struct ntreg_lf_record *lf = + (struct ntreg_lf_record *) (h->addr + offset); + + count += le16toh (lf->nr_keys); + } + + if (h->msglvl >= 2) + printf ("hivex_node_children: nr_subkeys_in_nk = %zu, counted = %zu\n", + nr_subkeys_in_nk, count); + + if (nr_subkeys_in_nk != count) { + errno = ENOTSUP; + return NULL; + } + + /* Copy list of children. Note nr_subkeys_in_nk is limited to + * something reasonable above. + */ + ret = malloc ((1 + nr_subkeys_in_nk) * sizeof (hive_node_h)); + if (ret == NULL) + return NULL; + + count = 0; + for (i = 0; i < nr_offsets; ++i) { + hive_node_h offset = ri->offset[i]; + offset += 0x1000; + if (!IS_VALID_BLOCK (h, offset)) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because ri-offset is not a valid block (%zu)\n", + offset); + errno = EFAULT; + return NULL; + } + if (!BLOCK_ID_EQ (h, offset, "lf") && !BLOCK_ID_EQ (h, offset, "lh")) { + errno = ENOTSUP; + return NULL; + } + + struct ntreg_lf_record *lf = + (struct ntreg_lf_record *) (h->addr + offset); + + size_t j; + for (j = 0; j < le16toh (lf->nr_keys); ++j) { + hive_node_h subkey = lf->keys[j].offset; + subkey += 0x1000; + if (!IS_VALID_BLOCK (h, subkey)) { + if (h->msglvl >= 2) + printf ("hivex_node_children: returning EFAULT because indirect subkey is not a valid block (%zu)\n", + subkey); + errno = EFAULT; + free (ret); + return NULL; + } + ret[count++] = subkey; + } + } + ret[count] = 0; + + return ret; + } + else { + errno = ENOTSUP; + return NULL; + } +} + +/* Very inefficient, but at least having a separate API call + * allows us to make it more efficient in future. + */ +hive_node_h +hivex_node_get_child (hive_h *h, hive_node_h node, const char *nname) +{ + hive_node_h *children = NULL; + char *name = NULL; + hive_node_h ret = 0; + + children = hivex_node_children (h, node); + if (!children) goto error; + + size_t i; + for (i = 0; children[i] != 0; ++i) { + name = hivex_node_name (h, children[i]); + if (!name) goto error; + if (strcasecmp (name, nname) == 0) { + ret = children[i]; + break; + } + free (name); name = NULL; + } + + error: + free (children); + free (name); + return ret; +} + +hive_node_h +hivex_node_parent (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return 0; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + hive_node_h ret = le32toh (nk->parent); + ret += 0x1000; + printf ("parent = %zu\n", ret); + if (!IS_VALID_BLOCK (h, ret)) { + if (h->msglvl >= 2) + printf ("hivex_node_parent: returning EFAULT because parent is not a valid block (%zu)\n", + ret); + errno = EFAULT; + return 0; + } + return ret; +} + +hive_value_h * +hivex_node_values (hive_h *h, hive_node_h node) +{ + if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) { + errno = EINVAL; + return 0; + } + + struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node); + + size_t nr_values = le32toh (nk->nr_values); + + if (h->msglvl >= 2) + printf ("hivex_node_values: nr_values = %zu\n", nr_values); + + /* Deal with the common "no values" case quickly. */ + hive_node_h *ret; + if (nr_values == 0) { + ret = malloc (sizeof (hive_node_h)); + if (ret == NULL) + return NULL; + ret[0] = 0; + return ret; + } + + /* Arbitrarily limit the number of values we will ever deal with. */ + if (nr_values > 100000) { + errno = ERANGE; + return NULL; + } + + /* Get the value list and check it looks reasonable. */ + size_t vlist_offset = le32toh (nk->vallist); + vlist_offset += 0x1000; + if (!IS_VALID_BLOCK (h, vlist_offset)) { + if (h->msglvl >= 2) + printf ("hivex_node_values: returning EFAULT because value list is not a valid block (%zu)\n", + vlist_offset); + errno = EFAULT; + return NULL; + } + + struct ntreg_value_list *vlist = + (struct ntreg_value_list *) (h->addr + vlist_offset); + + size_t len = block_len (h, vlist_offset, NULL); + if (4 + nr_values * 4 > len) { + if (h->msglvl >= 2) + printf ("hivex_node_values: returning EFAULT because value list is too long (%zu, %zu)\n", + nr_values, len); + errno = EFAULT; + return NULL; + } + + /* Allocate return array and copy values in. */ + ret = malloc ((1 + nr_values) * sizeof (hive_node_h)); + if (ret == NULL) + return NULL; + + size_t i; + for (i = 0; i < nr_values; ++i) { + hive_node_h value = vlist->offset[i]; + value += 0x1000; + if (!IS_VALID_BLOCK (h, value)) { + if (h->msglvl >= 2) + printf ("hivex_node_values: returning EFAULT because value is not a valid block (%zu)\n", + value); + errno = EFAULT; + free (ret); + return NULL; + } + ret[i] = value; + } + + ret[i] = 0; + return ret; +} + +/* Very inefficient, but at least having a separate API call + * allows us to make it more efficient in future. + */ +hive_value_h +hivex_node_get_value (hive_h *h, hive_node_h node, const char *key) +{ + hive_value_h *values = NULL; + char *name = NULL; + hive_value_h ret = 0; + + values = hivex_node_values (h, node); + if (!values) goto error; + + size_t i; + for (i = 0; values[i] != 0; ++i) { + name = hivex_value_key (h, values[i]); + if (!name) goto error; + if (strcasecmp (name, key) == 0) { + ret = values[i]; + break; + } + free (name); name = NULL; + } + + error: + free (values); + free (name); + return ret; +} + +char * +hivex_value_key (hive_h *h, hive_value_h value) +{ + if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) { + errno = EINVAL; + return 0; + } + + struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value); + + /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is + * necessary. However we do need to nul-terminate the string. + */ + + /* vk->name_len is unsigned, 16 bit, so this is safe ... However + * we have to make sure the length doesn't exceed the block length. + */ + size_t len = le16toh (vk->name_len); + size_t seg_len = block_len (h, value, NULL); + if (sizeof (struct ntreg_vk_record) + len - 1 > seg_len) { + if (h->msglvl >= 2) + printf ("hivex_value_key: returning EFAULT because key length is too long (%zu, %zu)\n", + len, seg_len); + errno = EFAULT; + return NULL; + } + + char *ret = malloc (len + 1); + if (ret == NULL) + return NULL; + memcpy (ret, vk->name, len); + ret[len] = '\0'; + return ret; +} + +int +hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len) +{ + if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) { + errno = EINVAL; + return -1; + } + + struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value); + + if (t) + *t = le32toh (vk->data_type); + + if (len) { + *len = le32toh (vk->data_len); + if (*len == 0x80000000) { /* special case */ + *len = 4; + if (t) *t = hive_t_dword; + } + *len &= 0x7fffffff; + } + + return 0; +} + +char * +hivex_value_value (hive_h *h, hive_value_h value, + hive_type *t_rtn, size_t *len_rtn) +{ + if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) { + errno = EINVAL; + return NULL; + } + + struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value); + + hive_type t; + size_t len; + + t = le32toh (vk->data_type); + + len = le32toh (vk->data_len); + if (len == 0x80000000) { /* special case */ + len = 4; + t = hive_t_dword; + } + len &= 0x7fffffff; + + if (h->msglvl >= 2) + printf ("hivex_value_value: value=%zu, t=%d, len=%zu\n", + value, t, len); + + if (t_rtn) + *t_rtn = t; + if (len_rtn) + *len_rtn = len; + + /* Arbitrarily limit the length that we will read. */ + if (len > 1000000) { + errno = ERANGE; + return NULL; + } + + char *ret = malloc (len); + if (ret == NULL) + return NULL; + + /* If length is <= 4 it's always stored inline. */ + if (len <= 4) { + memcpy (ret, (char *) &vk->data_offset, len); + return ret; + } + + size_t data_offset = vk->data_offset; + data_offset += 0x1000; + if (!IS_VALID_BLOCK (h, data_offset)) { + if (h->msglvl >= 2) + printf ("hivex_value_value: returning EFAULT because data offset is not a valid block (%zu)\n", + data_offset); + errno = EFAULT; + free (ret); + return NULL; + } + + /* Check that the declared size isn't larger than the block its in. */ + size_t blen = block_len (h, data_offset, NULL); + if (blen < len) { + if (h->msglvl >= 2) + printf ("hivex_value_value: returning EFAULT because data is longer than its block (%zu, %zu)\n", + blen, len); + errno = EFAULT; + free (ret); + return NULL; + } + + char *data = h->addr + data_offset + 4; + memcpy (ret, data, len); + return ret; +} + +static char * +windows_utf16_to_utf8 (/* const */ char *input, size_t len) +{ + iconv_t ic = iconv_open ("UTF-8", "UTF-16"); + if (ic == (iconv_t) -1) + return NULL; + + /* iconv(3) has an insane interface ... */ + + /* Mostly UTF-8 will be smaller, so this is a good initial guess. */ + size_t outalloc = len; + + again:; + size_t inlen = len; + size_t outlen = outalloc; + char *out = malloc (outlen + 1); + if (out == NULL) { + int err = errno; + iconv_close (ic); + errno = err; + return NULL; + } + char *inp = input; + char *outp = out; + + size_t r = iconv (ic, &inp, &inlen, &outp, &outlen); + if (r == (size_t) -1) { + if (errno == E2BIG) { + /* Try again with a larger output buffer. */ + free (out); + outalloc *= 2; + goto again; + } + else { + /* Else some conversion failure, eg. EILSEQ, EINVAL. */ + int err = errno; + iconv_close (ic); + free (out); + errno = err; + return NULL; + } + } + + *outp = '\0'; + iconv_close (ic); + + return out; +} + +char * +hivex_value_string (hive_h *h, hive_value_h value) +{ + hive_type t; + size_t len; + char *data = hivex_value_value (h, value, &t, &len); + + if (data == NULL) + return NULL; + + if (t != hive_t_string && t != hive_t_expand_string && t != hive_t_link) { + free (data); + errno = EINVAL; + return NULL; + } + + char *ret = windows_utf16_to_utf8 (data, len); + free (data); + if (ret == NULL) + return NULL; + + return ret; +} + +static void +free_strings (char **argv) +{ + if (argv) { + size_t i; + + for (i = 0; argv[i] != NULL; ++i) + free (argv[i]); + free (argv); + } +} + +/* Get the length of a UTF-16 format string. Handle the string as + * pairs of bytes, looking for the first \0\0 pair. + */ +static size_t +utf16_string_len_in_bytes (const char *str) +{ + size_t ret = 0; + + while (str[0] || str[1]) { + str += 2; + ret += 2; + } + + return ret; +} + +/* http://blogs.msdn.com/oldnewthing/archive/2009/10/08/9904646.aspx */ +char ** +hivex_value_multiple_strings (hive_h *h, hive_value_h value) +{ + hive_type t; + size_t len; + char *data = hivex_value_value (h, value, &t, &len); + + if (data == NULL) + return NULL; + + if (t != hive_t_multiple_strings) { + free (data); + errno = EINVAL; + return NULL; + } + + size_t nr_strings = 0; + char **ret = malloc ((1 + nr_strings) * sizeof (char *)); + if (ret == NULL) { + free (data); + return NULL; + } + ret[0] = NULL; + + char *p = data; + size_t plen; + + while (p < data + len && (plen = utf16_string_len_in_bytes (p)) > 0) { + nr_strings++; + char **ret2 = realloc (ret, (1 + nr_strings) * sizeof (char *)); + if (ret2 == NULL) { + free_strings (ret); + free (data); + return NULL; + } + ret = ret2; + + ret[nr_strings-1] = windows_utf16_to_utf8 (p, plen); + ret[nr_strings] = NULL; + if (ret[nr_strings-1] == NULL) { + free_strings (ret); + free (data); + return NULL; + } + + p += plen + 2 /* skip over UTF-16 \0\0 at the end of this string */; + } + + free (data); + return ret; +} + +int32_t +hivex_value_dword (hive_h *h, hive_value_h value) +{ + hive_type t; + size_t len; + char *data = hivex_value_value (h, value, &t, &len); + + if (data == NULL) + return -1; + + if ((t != hive_t_dword && t != hive_t_dword_be) || len != 4) { + free (data); + errno = EINVAL; + return -1; + } + + int32_t ret = *(int32_t*)data; + free (data); + if (t == hive_t_dword) /* little endian */ + ret = le32toh (ret); + else + ret = be32toh (ret); + + return ret; +} + +int64_t +hivex_value_qword (hive_h *h, hive_value_h value) +{ + hive_type t; + size_t len; + char *data = hivex_value_value (h, value, &t, &len); + + if (data == NULL) + return -1; + + if (t != hive_t_qword || len != 8) { + free (data); + errno = EINVAL; + return -1; + } + + int64_t ret = *(int64_t*)data; + free (data); + ret = le64toh (ret); /* always little endian */ + + return ret; +} + +int +hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, + void *opaque, int flags) +{ + return hivex_visit_node (h, hivex_root (h), visitor, len, opaque, flags); +} + +static int hivex__visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *vtor, char *unvisited, void *opaque, int flags); + +int +hivex_visit_node (hive_h *h, hive_node_h node, + const struct hivex_visitor *visitor, size_t len, void *opaque, + int flags) +{ + struct hivex_visitor vtor; + memset (&vtor, 0, sizeof vtor); + + /* Note that len might be larger *or smaller* than the expected size. */ + size_t copysize = len <= sizeof vtor ? len : sizeof vtor; + memcpy (&vtor, visitor, copysize); + + /* This bitmap records unvisited nodes, so we don't loop if the + * registry contains cycles. + */ + char *unvisited = malloc (1 + h->size / 32); + if (unvisited == NULL) + return -1; + memcpy (unvisited, h->bitmap, 1 + h->size / 32); + + int r = hivex__visit_node (h, node, &vtor, unvisited, opaque, flags); + free (unvisited); + return r; +} + +static int +hivex__visit_node (hive_h *h, hive_node_h node, + const struct hivex_visitor *vtor, char *unvisited, + void *opaque, int flags) +{ + int skip_bad = flags & HIVEX_VISIT_SKIP_BAD; + char *name = NULL; + hive_value_h *values = NULL; + hive_node_h *children = NULL; + char *key = NULL; + char *str = NULL; + char **strs = NULL; + int i; + + /* Return -1 on all callback errors. However on internal errors, + * check if skip_bad is set and suppress those errors if so. + */ + int ret = -1; + + if (!BITMAP_TST (unvisited, node)) { + if (h->msglvl >= 2) + printf ("hivex__visit_node: contains cycle: visited node %zu already\n", + node); + + errno = ELOOP; + return skip_bad ? 0 : -1; + } + BITMAP_CLR (unvisited, node); + + name = hivex_node_name (h, node); + if (!name) return skip_bad ? 0 : -1; + if (vtor->node_start && vtor->node_start (h, opaque, node, name) == -1) + goto error; + + values = hivex_node_values (h, node); + if (!values) { + ret = skip_bad ? 0 : -1; + goto error; + } + + for (i = 0; values[i] != 0; ++i) { + hive_type t; + size_t len; + + if (hivex_value_type (h, values[i], &t, &len) == -1) { + ret = skip_bad ? 0 : -1; + goto error; + } + + key = hivex_value_key (h, values[i]); + if (key == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + + switch (t) { + case hive_t_none: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (t != hive_t_none) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_none && + vtor->value_none (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + + case hive_t_string: + case hive_t_expand_string: + case hive_t_link: + str = hivex_value_string (h, values[i]); + if (str == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; + } + if (vtor->value_string && + vtor->value_string (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + + case hive_t_dword: + case hive_t_dword_be: { + int32_t i32 = hivex_value_dword (h, values[i]); + if (vtor->value_dword && + vtor->value_dword (h, opaque, node, values[i], t, len, key, i32) == -1) + goto error; + break; + } + + case hive_t_qword: { + int64_t i64 = hivex_value_qword (h, values[i]); + if (vtor->value_qword && + vtor->value_qword (h, opaque, node, values[i], t, len, key, i64) == -1) + goto error; + break; + } + + case hive_t_binary: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (t != hive_t_binary) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_binary && + vtor->value_binary (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + + case hive_t_multiple_strings: + strs = hivex_value_multiple_strings (h, values[i]); + if (strs == NULL) { + if (errno != EILSEQ && errno != EINVAL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_string_invalid_utf16) { + str = hivex_value_value (h, values[i], &t, &len); + if (vtor->value_string_invalid_utf16 (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + } + break; + } + if (vtor->value_multiple_strings && + vtor->value_multiple_strings (h, opaque, node, values[i], t, len, key, strs) == -1) + goto error; + free_strings (strs); strs = NULL; + break; + + case hive_t_resource_list: + case hive_t_full_resource_description: + case hive_t_resource_requirements_list: + default: + str = hivex_value_value (h, values[i], &t, &len); + if (str == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + if (vtor->value_other && + vtor->value_other (h, opaque, node, values[i], t, len, key, str) == -1) + goto error; + free (str); str = NULL; + break; + } + + free (key); key = NULL; + } + + children = hivex_node_children (h, node); + if (children == NULL) { + ret = skip_bad ? 0 : -1; + goto error; + } + + for (i = 0; children[i] != 0; ++i) { + if (h->msglvl >= 2) + printf ("hivex__visit_node: %s: visiting subkey %d (%zu)\n", + name, i, children[i]); + + if (hivex__visit_node (h, children[i], vtor, unvisited, opaque, flags) == -1) + goto error; + } + + if (vtor->node_end && vtor->node_end (h, opaque, node, name) == -1) + goto error; + + ret = 0; + + error: + free (name); + free (values); + free (children); + free (key); + free (str); + free_strings (strs); + return ret; +} |