diff options
Diffstat (limited to 'hivex/hivex.pod')
-rw-r--r-- | hivex/hivex.pod | 655 |
1 files changed, 0 insertions, 655 deletions
diff --git a/hivex/hivex.pod b/hivex/hivex.pod deleted file mode 100644 index 275eb42..0000000 --- a/hivex/hivex.pod +++ /dev/null @@ -1,655 +0,0 @@ -=encoding utf8 - -=head1 NAME - -hivex - Windows Registry "hive" extraction library - -=head1 SYNOPSIS - - hive_h *hivex_open (const char *filename, int flags); - int hivex_close (hive_h *h); - -=head1 DESCRIPTION - -libhivex is a library for extracting the contents of Windows Registry -"hive" files. It is designed to be secure against buggy or malicious -registry files. - -Unlike many other tools in this area, it doesn't use the textual .REG -format for output, because parsing that is as much trouble as parsing -the original binary format. Instead it makes the file available -through a C API, or there is a separate program to export the hive as -XML (see L<hivexml(1)>), or to get individual keys (see -L<hivexget(1)>). - -=head2 OPENING AND CLOSING A HIVE - -=over 4 - -=item hive_h *hivex_open (const char *filename, int flags); - -Opens the hive named C<filename> for reading. - -Flags is an ORed list of the open flags (or C<0> if you don't -want to pass any flags). These flags are defined: - -=over 4 - -=item HIVEX_OPEN_VERBOSE - -Verbose messages. - -=item HIVEX_OPEN_DEBUG - -Very verbose messages, suitable for debugging problems in the library -itself. - -This is also selected if the C<HIVEX_DEBUG> environment variable -is set to 1. - -=item HIVEX_OPEN_WRITE - -Open the hive for writing. If omitted, the hive is read-only. - -See L</WRITING TO HIVE FILES>. - -=back - -C<hivex_open> returns a hive handle. On error this returns NULL and -sets C<errno> to indicate the error. - -=item int hivex_close (hive_h *h); - -Close a hive handle and free all associated resources. - -Note that any uncommitted writes are I<not> committed by this call, -but instead are lost. See L</WRITING TO HIVE FILES>. - -Returns 0 on success. On error this returns -1 and sets errno. - -=back - -=head2 NAVIGATING THE TREE OF HIVE SUBKEYS - -=over 4 - -=item hive_node_h - -This is a node handle, an integer but opaque outside the library. -Valid node handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_node_h hivex_root (hive_h *h); - -Return root node of the hive. All valid registries must contain -a root node. - -On error this returns 0 and sets errno. - -=item char *hivex_node_name (hive_h *h, hive_node_h node); - -Return the name of the node. The name is reencoded as UTF-8 -and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that the name of the root node is a dummy, such as -C<$$$PROTO.HIV> (other names are possible: it seems to depend on the -tool or program that created the hive in the first place). You can -only know the "real" name of the root node by knowing which registry -file this hive originally comes from, which is knowledge that is -outside the scope of this library. - -On error this returns NULL and sets errno. - -=item hive_node_h *hivex_node_children (hive_h *h, hive_node_h node); - -Return a 0-terminated array of nodes which are the subkeys -(children) of C<node>. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_node_h hivex_node_get_child (hive_h *h, hive_node_h node, const char *name); - -Return the child of node with the name C<name>, if it exists. - -The name is matched case insensitively. - -If the child node does not exist, this returns 0 without -setting errno. - -On error this returns 0 and sets errno. - -=item hive_node_h hivex_node_parent (hive_h *h, hive_node_h node); - -Return the parent of C<node>. - -On error this returns 0 and sets errno. - -The parent pointer of the root node in registry files that we -have examined seems to be invalid, and so this function will -return an error if called on the root node. - -=back - -=head2 GETTING VALUES AT A NODE - -The enum below describes the possible types for the value(s) -stored at each node. - - enum hive_type { - hive_t_none = 0, - hive_t_string = 1, - hive_t_expand_string = 2, - hive_t_binary = 3, - hive_t_dword = 4, - hive_t_dword_be = 5, - hive_t_link = 6, - hive_t_multiple_strings = 7, - hive_t_resource_list = 8, - hive_t_full_resource_description = 9, - hive_t_resource_requirements_list = 10, - hive_t_qword = 11 - }; - -=over 4 - -=item hive_value_h - -This is a value handle, an integer but opaque outside the library. -Valid value handles cannot be 0. The library returns 0 in some -situations to indicate an error. - -=item hive_value_h *hivex_node_values (hive_h *h, hive_node_h node); - -Return the 0-terminated array of (key, value) pairs attached to -this node. - -The array should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item hive_value_h hivex_node_get_value (hive_h *h, hive_node_h node, const char *key); - -Return the value attached to this node which has the name C<key>, -if it exists. - -The key name is matched case insensitively. - -Note that to get the default key, you should pass the empty -string C<""> here. The default key is often written C<"@">, but -inside hives that has no meaning and won't give you the -default key. - -If no such key exists, this returns 0 and does not set errno. - -On error this returns 0 and sets errno. - -=item char *hivex_value_key (hive_h *h, hive_value_h value); - -Return the key (name) of a (key, value) pair. The name -is reencoded as UTF-8 and returned as a C string. - -The string should be freed by the caller when it is no longer needed. - -Note that this function can return a zero-length string. In the -context of Windows Registries, this means that this value is the -default key for this node in the tree. This is usually written -as C<"@">. - -On error this returns NULL and sets errno. - -=item int hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the data type and length of the value in this (key, value) -pair. See also C<hivex_value_value> which returns all this -information, and the value itself. Also, C<hivex_value_*> functions -below which can be used to return the value in a more useful form when -you know the type in advance. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item char *hivex_value_value (hive_h *h, hive_value_h value, hive_type *t, size_t *len); - -Return the value of this (key, value) pair. The value should -be interpreted according to its type (see C<enum hive_type>). - -The value is returned in an array of bytes of length C<len>. - -The value should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char *hivex_value_string (hive_h *h, hive_value_h value); - -If this value is a string, return the string reencoded as UTF-8 -(as a C string). This only works for values which have type -C<hive_t_string>, C<hive_t_expand_string> or C<hive_t_link>. - -The string should be freed by the caller when it is no longer needed. - -On error this returns NULL and sets errno. - -=item char **hivex_value_multiple_strings (hive_h *h, hive_value_h value); - -If this value is a multiple-string, return the strings reencoded -as UTF-8 (as a NULL-terminated array of C strings). This only -works for values which have type C<hive_t_multiple_strings>. - -The string array and each string in it should be freed by the -caller when they are no longer needed. - -On error this returns NULL and sets errno. - -=item int32_t hivex_value_dword (hive_h *h, hive_value_h value); - -If this value is a DWORD (Windows int32), return it. This only works -for values which have type C<hive_t_dword> or C<hive_t_dword_be>. - -=item int64_t hivex_value_qword (hive_h *h, hive_value_h value); - -If this value is a QWORD (Windows int64), return it. This only -works for values which have type C<hive_t_qword>. - -=back - -=head2 VISITING ALL NODES - -The visitor pattern is useful if you want to visit all nodes -in the tree or all nodes below a certain point in the tree. - -First you set up your own C<struct hivex_visitor> with your -callback functions. - -Each of these callback functions should return 0 on success or -1 -on error. If any callback returns -1, then the entire visit -terminates immediately. If you don't need a callback function at -all, set the function pointer to NULL. - - struct hivex_visitor { - int (*node_start) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*node_end) (hive_h *, void *opaque, hive_node_h, const char *name); - int (*value_string) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *str); - int (*value_multiple_strings) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, char **argv); - int (*value_string_invalid_utf16) (hive_h *, void *opaque, hive_node_h, - hive_value_h, hive_type t, size_t len, const char *key, - const char *str); - int (*value_dword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int32_t); - int (*value_qword) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, int64_t); - int (*value_binary) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_none) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - int (*value_other) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - /* If value_any callback is not NULL, then the other value_* - * callbacks are not used, and value_any is called on all values. - */ - int (*value_any) (hive_h *, void *opaque, hive_node_h, hive_value_h, - hive_type t, size_t len, const char *key, const char *value); - }; - -=over 4 - -=item int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags); - -Visit all the nodes recursively in the hive C<h>. - -C<visitor> should be a C<hivex_visitor> structure with callback -fields filled in as required (unwanted callbacks can be set to -NULL). C<len> must be the length of the 'visitor' struct (you -should pass C<sizeof (struct hivex_visitor)> for this). - -This returns 0 if the whole recursive visit was completed -successfully. On error this returns -1. If one of the callback -functions returned an error than we don't touch errno. If the -error was generated internally then we set errno. - -You can skip bad registry entries by setting C<flag> to -C<HIVEX_VISIT_SKIP_BAD>. If this flag is not set, then a bad registry -causes the function to return an error immediately. - -This function is robust if the registry contains cycles or -pointers which are invalid or outside the registry. It detects -these cases and returns an error. - -=item int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque); - -Same as C<hivex_visit> but instead of starting out at the root, this -starts at C<node>. - -=back - -=head2 WRITING TO HIVE FILES - -The hivex library supports making limited modifications to hive files. -We have tried to implement this very conservatively in order to reduce -the chance of corrupting your registry. However you should be careful -and take back-ups, since Microsoft has never documented the hive -format, and so it is possible there are nuances in the -reverse-engineered format that we do not understand. - -To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE> -flag to C<hivex_open>, otherwise any write operation will return with -errno C<EROFS>. - -The write operations shown below do not modify the on-disk file -immediately. You must call C<hivex_commit> in order to write the -changes to disk. If you call C<hivex_close> without committing then -any writes are discarded. - -Hive files internally consist of a "memory dump" of binary blocks -(like the C heap), and some of these blocks can be unused. The hivex -library never reuses these unused blocks. Instead, to ensure -robustness in the face of the partially understood on-disk format, -hivex only allocates new blocks after the end of the file, and makes -minimal modifications to existing structures in the file to point to -these new blocks. This makes hivex slightly less disk-efficient than -it could be, but disk is cheap, and registry modifications tend to be -very small. - -When deleting nodes, it is possible that this library may leave -unreachable live blocks in the hive. This is because certain parts of -the hive disk format such as security (sk) records and big data (db) -records and classname fields are not well understood (and not -documented at all) and we play it safe by not attempting to modify -them. Apart from wasting a little bit of disk space, it is not -thought that unreachable blocks are a problem. - -=over 4 - -=item int hivex_commit (hive_h *h, const char *filename, int flags); - -Commit (write) any changes which have been made. - -C<filename> is the new file to write. If C<filename == NULL> then we -overwrite the original file (ie. the file name that was passed to -C<hivex_open>). C<flags> is not used, always pass 0. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note this does not close the hive handle. You can perform further -operations on the hive after committing, including making more -modifications. If you no longer wish to use the hive, call -C<hivex_close> after this. - -=item hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name); - -Add a new child node named C<name> to the existing node C<parent>. -The new child initially has no subnodes and contains no keys or -values. The sk-record (security descriptor) is inherited from -the parent. - -The parent must not have an existing child called C<name>, so if you -want to overwrite an existing child, call C<hivex_node_delete_child> -first. - -Returns the node handle. On error this returns 0 and sets errno. - -=item int hivex_node_delete_child (hive_h *h, hive_node_h node); - -Delete the node C<node>. All values at the node and all subnodes are -deleted (recursively). The C<node> handle and the handles of all -subnodes become invalid. You cannot delete the root node. - -Returns 0 on success. On error this returns -1 and sets errno. - -=item hive_set_value - -The typedef C<hive_set_value> is used in conjunction with the -C<hivex_node_set_values> call described below. - - struct hive_set_value { - char *key; /* key - a UTF-8 encoded ASCIIZ string */ - hive_type t; /* type of value field */ - size_t len; /* length of value field in bytes */ - char *value; /* value field */ - }; - typedef struct hive_set_value hive_set_value; - -To set the default value for a node, you have to pass C<key = "">. - -Note that the C<value> field is just treated as a list of bytes, and -is stored directly in the hive. The caller has to ensure correct -encoding and endianness, for example converting dwords to little -endian. - -The correct type and encoding for values depends on the node and key -in the registry, the version of Windows, and sometimes even changes -between versions of Windows for the same key. We don't document it -here. Often it's not documented at all. - -=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags); - -This call can be used to set all the (key, value) pairs stored in C<node>. - -C<node> is the node to modify. C<values> is an array of (key, value) -pairs. There should be C<nr_values> elements in this array. C<flags> -is not used, always pass 0. - -Any existing values stored at the node are discarded, and their -C<hive_value_h> handles become invalid. Thus you can remove all -values stored at C<node> by passing C<nr_values = 0>. - -Returns 0 on success. On error this returns -1 and sets errno. - -Note that this library does not offer a way to modify just a single -key at a node. We don't implement a way to do this efficiently. - -=back - -=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED - -=over 4 - -=item * - -Changing the root node. - -=item * - -Creating a new hive file from scratch. This is impossible at present -because not all fields in the header are understood. - -=item * - -Modifying or deleting single values at a node. - -=item * - -Modifying security key (sk) records or classnames. -Previously we did not understand these records. However now they -are well-understood and we could add support if it was required -(but nothing much really uses them). - -=back - -=head1 THE STRUCTURE OF THE WINDOWS REGISTRY - -Note: To understand the relationship between hives and the common -Windows Registry keys (like C<HKEY_LOCAL_MACHINE>) please see the -Wikipedia page on the Windows Registry. - -The Windows Registry is split across various binary files, each -file being known as a "hive". This library only handles a single -hive file at a time. - -Hives are n-ary trees with a single root. Each node in the tree -has a name. - -Each node in the tree (including non-leaf nodes) may have an -arbitrary list of (key, value) pairs attached to it. It may -be the case that one of these pairs has an empty key. This -is referred to as the default key for the node. - -The (key, value) pairs are the place where the useful data is -stored in the registry. The key is always a string (possibly the -empty string for the default key). The value is a typed object -(eg. string, int32, binary, etc.). - -=head2 RELATIONSHIP TO .REG FILES - -Although this library does not care about or deal with Windows reg -files, it's useful to look at the relationship between the registry -itself and reg files because they are so common. - -A reg file is a text representation of the registry, or part of the -registry. The actual registry hives that Windows uses are binary -files. There are a number of Windows and Linux tools that let you -generate reg files, or merge reg files back into the registry hives. -Notable amongst them is Microsoft's REGEDIT program (formerly known as -REGEDT32). - -A typical reg file will contain many sections looking like this: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - "@"="Generic Stack" - "TileInfo"="prop:System.FileCount" - "TilePath"=str(2):"%systemroot%\\system32" - "ThumbnailCutoff"=dword:00000000 - "FriendlyTypeName"=hex(2):40,00,25,00,53,00,79,00,73,00,74,00,65,00,6d,00,52,00,6f,00,\ - 6f,00,74,00,25,00,5c,00,53,00,79,00,73,00,74,00,65,00,6d,00,\ - 33,00,32,00,5c,00,73,00,65,00,61,00,72,00,63,00,68,00,66,00,\ - 6f,00,6c,00,64,00,65,00,72,00,2e,00,64,00,6c,00,6c,00,2c,00,\ - 2d,00,39,00,30,00,32,00,38,00,00,00,d8 - -Taking this one piece at a time: - - [HKEY_LOCAL_MACHINE\SOFTWARE\Classes\Stack] - -This is the path to this node in the registry tree. The first part, -C<HKEY_LOCAL_MACHINE\SOFTWARE> means that this comes from a hive -(file) called C<SOFTWARE>. C<\Classes\Stack> is the real path part, -starting at the root node of the C<SOFTWARE> hive. - -Below the node name is a list of zero or more key-value pairs. Any -interior or leaf node in the registry may have key-value pairs -attached. - - "@"="Generic Stack" - -This is the "default key". In reality (ie. inside the binary hive) -the key string is the empty string. In reg files this is written as -C<@> but this has no meaning either in the hives themselves or in this -library. The value is a string (type 1 - see C<enum hive_type> -above). - - "TileInfo"="prop:System.FileCount" - -This is a regular (key, value) pair, with the value being a type 1 -string. Note that inside the binary file the string is likely to be -UTF-16 encoded. This library converts to and from UTF-8 strings -transparently. - - "TilePath"=str(2):"%systemroot%\\system32" - -The value in this case has type 2 (expanded string) meaning that some -%...% variables get expanded by Windows. (This library doesn't know -or care about variable expansion). - - "ThumbnailCutoff"=dword:00000000 - -The value in this case is a dword (type 4). - - "FriendlyTypeName"=hex(2):40,00,.... - -This value is an expanded string (type 2) represented in the reg file -as a series of hex bytes. In this case the string appears to be a -UTF-16 string. - -=head1 NOTE ON THE USE OF ERRNO - -Many functions in this library set errno to indicate errors. These -are the values of errno you may encounter (this list is not -exhaustive): - -=over 4 - -=item ENOTSUP - -Corrupt or unsupported Registry file format. - -=item ENOKEY - -Missing root key. - -=item EINVAL - -Passed an invalid argument to the function. - -=item EFAULT - -Followed a Registry pointer which goes outside -the registry or outside a registry block. - -=item ELOOP - -Registry contains cycles. - -=item ERANGE - -Field in the registry out of range. - -=item EEXIST - -Registry key already exists. - -=item EROFS - -Tried to write to a registry which is not opened for writing. - -=back - -=head1 ENVIRONMENT VARIABLES - -=over 4 - -=item HIVEX_DEBUG - -Setting HIVEX_DEBUG=1 will enable very verbose messages. This is -useful for debugging problems with the library itself. - -=back - -=head1 SEE ALSO - -L<hivexml(1)>, -L<hivexget(1)>, -L<virt-win-reg(1)>, -L<guestfs(3)>, -L<http://libguestfs.org/>, -L<virt-cat(1)>, -L<virt-edit(1)>, -L<http://en.wikipedia.org/wiki/Windows_Registry>. - -=head1 AUTHORS - -Richard W.M. Jones (C<rjones at redhat dot com>) - -=head1 COPYRIGHT - -Copyright (C) 2009-2010 Red Hat Inc. - -Derived from code by Petter Nordahl-Hagen under a compatible license: -Copyright (C) 1997-2007 Petter Nordahl-Hagen. - -Derived from code by Markus Stephany under a compatible license: -Copyright (C) 2000-2004 Markus Stephany. - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; -version 2.1 of the License. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -See file LICENSE for the full license. |