summaryrefslogtreecommitdiffstats
path: root/libreport/src/lib/problem_data.c
diff options
context:
space:
mode:
Diffstat (limited to 'libreport/src/lib/problem_data.c')
-rw-r--r--libreport/src/lib/problem_data.c424
1 files changed, 424 insertions, 0 deletions
diff --git a/libreport/src/lib/problem_data.c b/libreport/src/lib/problem_data.c
new file mode 100644
index 00000000..ed88f9d8
--- /dev/null
+++ b/libreport/src/lib/problem_data.c
@@ -0,0 +1,424 @@
+/*
+ Copyright (C) 2010 Denys Vlasenko (dvlasenk@redhat.com)
+ Copyright (C) 2010 RedHat inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+#include "libreport.h"
+
+static void free_problem_item(void *ptr)
+{
+ if (ptr)
+ {
+ struct problem_item *item = (struct problem_item *)ptr;
+ free(item->content);
+ free(item);
+ }
+}
+
+char *format_problem_item(struct problem_item *item)
+{
+ if (!item)
+ return xstrdup("(nullitem)");
+
+ if (item->flags & CD_FLAG_UNIXTIME)
+ {
+ errno = 0;
+ char *end;
+ time_t time = strtol(item->content, &end, 10);
+ if (!errno && !*end && end != item->content)
+ {
+ char timeloc[256];
+ int success = strftime(timeloc, sizeof(timeloc), "%c", localtime(&time));
+ if (success)
+ return xstrdup(timeloc);
+ }
+ }
+ return NULL;
+}
+
+/* problem_data["name"] = { "content", CD_FLAG_foo_bits } */
+
+problem_data_t *new_problem_data(void)
+{
+ return g_hash_table_new_full(g_str_hash, g_str_equal,
+ free, free_problem_item);
+}
+
+void add_basics_to_problem_data(problem_data_t *pd)
+{
+ const char *analyzer = get_problem_item_content_or_NULL(pd, FILENAME_ANALYZER);
+ if (analyzer == NULL)
+ add_to_problem_data(pd, "analyzer", "libreport");
+
+ /* If application didn't provide dupe hash, we generate it
+ * from all components, so we at least eliminate the exact same
+ * reports
+ */
+ if (get_problem_item_content_or_NULL(pd, FILENAME_DUPHASH) == NULL)
+ {
+ /* start hash */
+ sha1_ctx_t sha1ctx;
+ sha1_begin(&sha1ctx);
+
+ /*
+ * To avoid spurious hash differences, sort keys so that elements are
+ * always processed in the same order:
+ */
+ GList *list = g_hash_table_get_keys(pd);
+ list = g_list_sort(list, (GCompareFunc)strcmp);
+ GList *l = list;
+ while (l)
+ {
+ const char *key = l->data;
+ l = l->next;
+ struct problem_item *item = g_hash_table_lookup(pd, key);
+ /* do not hash items which are binary (item->flags & CD_FLAG_BIN).
+ * Their ->content is full file name, with path. Path is always
+ * different and will make hash differ even if files are the same.
+ */
+ if (item->flags & CD_FLAG_BIN)
+ continue;
+ sha1_hash(&sha1ctx, item->content, strlen(item->content));
+ }
+ g_list_free(list);
+
+ /* end hash */
+ char hash_bytes[SHA1_RESULT_LEN];
+ sha1_end(&sha1ctx, hash_bytes);
+ char hash_str[SHA1_RESULT_LEN*2 + 1];
+ bin2hex(hash_str, hash_bytes, SHA1_RESULT_LEN)[0] = '\0';
+
+ add_to_problem_data(pd, FILENAME_DUPHASH, hash_str);
+ }
+
+ pid_t pid = getpid();
+ if (pid > 0)
+ {
+ char buf[PATH_MAX+1];
+ char *exe = xasprintf("/proc/%u/exe", pid);
+ ssize_t read = readlink(exe, buf, PATH_MAX);
+ if (read > 0)
+ {
+ buf[read] = 0;
+ VERB2 log("reporting initiated from: %s", buf);
+ add_to_problem_data(pd, FILENAME_EXECUTABLE, buf);
+ }
+ free(exe);
+
+//#ifdef WITH_RPM
+ /* FIXME: component should be taken from rpm using librpm
+ * which means we need to link against it :(
+ * or run rpm -qf executable ??
+ */
+ /* Fedora/RHEL rpm specific piece of code */
+ const char *component = get_problem_item_content_or_NULL(pd, FILENAME_COMPONENT);
+ //FIXME: this REALLY needs to go away, or every report will be assigned to abrt
+ if (component == NULL) // application didn't specify component
+ add_to_problem_data(pd, FILENAME_COMPONENT, "abrt");
+//#endif
+ }
+}
+
+void add_to_problem_data_ext(problem_data_t *problem_data,
+ const char *name,
+ const char *content,
+ unsigned flags)
+{
+ if (!(flags & CD_FLAG_BIN))
+ flags |= CD_FLAG_TXT;
+ if (!(flags & CD_FLAG_ISEDITABLE))
+ flags |= CD_FLAG_ISNOTEDITABLE;
+
+ struct problem_item *item = (struct problem_item *)xzalloc(sizeof(*item));
+ item->content = xstrdup(content);
+ item->flags = flags;
+ g_hash_table_replace(problem_data, xstrdup(name), item);
+}
+
+void add_to_problem_data(problem_data_t *problem_data,
+ const char *name,
+ const char *content)
+{
+ add_to_problem_data_ext(problem_data, name, content, CD_FLAG_TXT + CD_FLAG_ISNOTEDITABLE);
+}
+
+const char *get_problem_item_content_or_die(problem_data_t *problem_data, const char *key)
+{
+ struct problem_item *item = get_problem_data_item_or_NULL(problem_data, key);
+ if (!item)
+ error_msg_and_die("Error accessing problem data: no ['%s']", key);
+ return item->content;
+}
+
+const char *get_problem_item_content_or_NULL(problem_data_t *problem_data, const char *key)
+{
+ struct problem_item *item = get_problem_data_item_or_NULL(problem_data, key);
+ if (!item)
+ return NULL;
+ return item->content;
+}
+
+
+/* Miscellaneous helpers */
+
+static const char *const editable_files[] = {
+ FILENAME_COMMENT ,
+ FILENAME_BACKTRACE,
+ NULL
+};
+static bool is_editable_file(const char *file_name)
+{
+ return is_in_string_list(file_name, (char**)editable_files);
+}
+
+static const char *const always_text_files[] = {
+ FILENAME_CMDLINE ,
+ FILENAME_BACKTRACE,
+ NULL
+};
+static char* is_text_file(const char *name, ssize_t *sz)
+{
+ /* We were using magic.h API to check for file being text, but it thinks
+ * that file containing just "0" is not text (!!)
+ * So, we do it ourself.
+ */
+
+ int fd = open(name, O_RDONLY);
+ if (fd < 0)
+ return NULL; /* it's not text (because it does not exist! :) */
+
+ /* Maybe 64k limit is small. But _some_ limit is necessary:
+ * fields declared "text" may end up in editing fields and such.
+ * We don't want to accidentally end up with 100meg text in a textbox!
+ * So, don't remove this. If you really need to, raise the limit.
+ *
+ * Bumped up to 200k: saw 124740 byte /proc/PID/smaps file
+ */
+ off_t size = lseek(fd, 0, SEEK_END);
+ if (size < 0 || size > 200*1024)
+ {
+ close(fd);
+ return NULL; /* it's not a SMALL text */
+ }
+ lseek(fd, 0, SEEK_SET);
+
+ char *buf = (char*)xmalloc(*sz);
+ ssize_t r = full_read(fd, buf, *sz);
+ close(fd);
+ if (r < 0)
+ {
+ free(buf);
+ return NULL; /* it's not text (because we can't read it) */
+ }
+ if (r < *sz)
+ buf[r] = '\0';
+ *sz = r;
+
+ /* Some files in our dump directories are known to always be textual */
+ const char *base = strrchr(name, '/');
+ if (base)
+ {
+ base++;
+ if (is_in_string_list(base, (char**)always_text_files))
+ return buf;
+ }
+
+ /* Every once in a while, even a text file contains a few garbled
+ * or unexpected non-ASCII chars. We should not declare it "binary".
+ */
+ const unsigned RATIO = 50;
+ unsigned total_chars = r + RATIO;
+ unsigned bad_chars = 1; /* 1 prevents division by 0 later */
+ while (--r >= 0)
+ {
+ if (buf[r] >= 0x7f
+ /* among control chars, only '\t','\n' etc are allowed */
+ || (buf[r] < ' ' && !isspace(buf[r]))
+ ) {
+ if (buf[r] == '\0')
+ {
+ /* We don't like NULs very much. Not text for sure! */
+ free(buf);
+ return NULL;
+ }
+ bad_chars++;
+ }
+ }
+
+ if ((total_chars / bad_chars) >= RATIO)
+ return buf; /* looks like text to me */
+
+ free(buf);
+ return NULL; /* it's binary */
+}
+
+void load_problem_data_from_dump_dir(problem_data_t *problem_data, struct dump_dir *dd, char **excluding)
+{
+ char *short_name;
+ char *full_name;
+
+ dd_init_next_file(dd);
+ while (dd_get_next_file(dd, &short_name, &full_name))
+ {
+ if (excluding && is_in_string_list(short_name, excluding))
+ {
+ //log("Excluded:'%s'", short_name);
+ goto next;
+ }
+
+ ssize_t sz = 4*1024;
+ char *text = NULL;
+ bool editable = is_editable_file(short_name);
+
+ if (!editable)
+ {
+ text = is_text_file(full_name, &sz);
+ if (!text)
+ {
+ add_to_problem_data_ext(problem_data,
+ short_name,
+ full_name,
+ CD_FLAG_BIN + CD_FLAG_ISNOTEDITABLE
+ );
+ goto next;
+ }
+ }
+
+ char *content;
+ if (sz < 4*1024) /* did is_text_file read entire file? */
+ {
+ /* yes */
+ content = text;
+ }
+ else
+ {
+ /* no, need to read it all */
+ free(text);
+ content = dd_load_text(dd, short_name);
+ }
+ /* Strip '\n' from one-line elements: */
+ char *nl = strchr(content, '\n');
+ if (nl && nl[1] == '\0')
+ *nl = '\0';
+
+ int flags = 0;
+
+ if (editable)
+ flags |= CD_FLAG_TXT | CD_FLAG_ISEDITABLE;
+ else
+ flags |= CD_FLAG_TXT | CD_FLAG_ISNOTEDITABLE;
+
+ static const char *const list_files[] = {
+ FILENAME_UID ,
+ FILENAME_PACKAGE ,
+ FILENAME_EXECUTABLE,
+ FILENAME_TIME ,
+ FILENAME_COUNT ,
+ NULL
+ };
+ if (is_in_string_list(short_name, (char**)list_files))
+ flags |= CD_FLAG_LIST;
+
+ if (strcmp(short_name, FILENAME_TIME) == 0)
+ flags |= CD_FLAG_UNIXTIME;
+
+ add_to_problem_data_ext(problem_data,
+ short_name,
+ content,
+ flags
+ );
+ free(content);
+ next:
+ free(short_name);
+ free(full_name);
+ }
+}
+
+problem_data_t *create_problem_data_from_dump_dir(struct dump_dir *dd)
+{
+ problem_data_t *problem_data = new_problem_data();
+ load_problem_data_from_dump_dir(problem_data, dd, NULL);
+ return problem_data;
+}
+
+/*
+ * Returns NULL-terminated char *vector[]. Result itself must be freed,
+ * but do no free list elements. IOW: do free(result), but never free(result[i])!
+ * If comma_separated_list is NULL or "", returns NULL.
+ */
+static char **build_exclude_vector(const char *comma_separated_list)
+{
+ char **exclude_items = NULL;
+ if (comma_separated_list && comma_separated_list[0])
+ {
+ /* even w/o commas, we'll need two elements:
+ * exclude_items[0] = "name"
+ * exclude_items[1] = NULL
+ */
+ unsigned cnt = 2;
+
+ const char *cp = comma_separated_list;
+ while (*cp)
+ if (*cp++ == ',')
+ cnt++;
+
+ /* We place the string directly after the char *vector[cnt]: */
+ exclude_items = xzalloc(cnt * sizeof(exclude_items[0]) + (cp - comma_separated_list) + 1);
+ char *p = strcpy((char*)&exclude_items[cnt], comma_separated_list);
+
+ char **pp = exclude_items;
+ *pp++ = p;
+ while (*p)
+ {
+ if (*p++ == ',')
+ {
+ p[-1] = '\0';
+ *pp++ = p;
+ }
+ }
+ }
+
+ return exclude_items;
+}
+
+problem_data_t *create_problem_data_for_reporting(const char *dump_dir_name)
+{
+ char **exclude_items = build_exclude_vector(getenv("EXCLUDE_FROM_REPORT"));
+ struct dump_dir *dd = dd_opendir(dump_dir_name, /*flags:*/ 0);
+ if (!dd)
+ return NULL; /* dd_opendir already emitted error msg */
+ problem_data_t *problem_data = new_problem_data();
+ load_problem_data_from_dump_dir(problem_data, dd, exclude_items);
+ dd_close(dd);
+ free(exclude_items);
+ return problem_data;
+}
+
+void log_problem_data(problem_data_t *problem_data, const char *pfx)
+{
+ GHashTableIter iter;
+ char *name;
+ struct problem_item *value;
+ g_hash_table_iter_init(&iter, problem_data);
+ while (g_hash_table_iter_next(&iter, (void**)&name, (void**)&value))
+ {
+ log("%s[%s]:'%s' 0x%x",
+ pfx, name,
+ value->content,
+ value->flags
+ );
+ }
+}