From 4285dc9a58fbdae1516c4117a3f9297b822f27ff Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Wed, 20 May 2009 15:24:02 +0200 Subject: Fetch and store both debug_frame and eh_frame tables. * runtime/sym.h (_stp_module): Remove unwind_data, unwind_data_len and unwind_is_ehframe fields. Add debug_frame, eh_frame, debug_frame_len, eh_frame_len and eh_frame_addr fields. * runtime/unwind.c: Use debug_frame and debug_frame_len instead of unwind_data and unwind_data_len throughout. (cie_for_fde): Take unwind_data and is_ehframe as direct arguments. * runtime/unwind/unwind.h (cie_for_fde): New function declaration. * translate.cxx (get_unwind_data): Fetch and return both debug_frame and eh_frame tables. (dump_unwindsyms): Dump both debug_frame and eh_frame tables. --- runtime/sym.h | 8 +-- runtime/unwind.c | 29 +++++------ runtime/unwind/unwind.h | 4 +- translate.cxx | 130 +++++++++++++++++++++++++++++++++--------------- 4 files changed, 113 insertions(+), 58 deletions(-) diff --git a/runtime/sym.h b/runtime/sym.h index 80c334fb..7e28ebe6 100644 --- a/runtime/sym.h +++ b/runtime/sym.h @@ -42,11 +42,13 @@ struct _stp_module { unsigned long dwarf_module_base; /* the stack unwind data for this module */ - void *unwind_data; + void *debug_frame; + void *eh_frame; void *unwind_hdr; - uint32_t unwind_data_len; + uint32_t debug_frame_len; + uint32_t eh_frame_len; uint32_t unwind_hdr_len; - uint32_t unwind_is_ehframe; /* unwind data comes from .eh_frame */ + unsigned long eh_frame_addr; /* Orig load address (offset) .eh_frame */ /* build-id information */ unsigned char *build_id_bits; unsigned long build_id_offset; diff --git a/runtime/unwind.c b/runtime/unwind.c index f03534bd..8ba3cf76 100644 --- a/runtime/unwind.c +++ b/runtime/unwind.c @@ -87,7 +87,8 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) } /* given an FDE, find its CIE */ -static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *m) +static const u32 *cie_for_fde(const u32 *fde, void *unwind_data, + int is_ehframe) { const u32 *cie; @@ -96,7 +97,7 @@ static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *m) return &bad_cie; /* CIE id for eh_frame is 0, otherwise 0xffffffff */ - if (m->unwind_is_ehframe && fde[1] == 0) + if (is_ehframe && fde[1] == 0) return ¬_fde; else if (fde[1] == 0xffffffff) return ¬_fde; @@ -104,18 +105,18 @@ static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *m) /* OK, must be an FDE. Now find its CIE. */ /* CIE_pointer must be a proper offset */ - if ((fde[1] & (sizeof(*fde) - 1)) || fde[1] > (unsigned long)(fde + 1) - (unsigned long)m->unwind_data) { + if ((fde[1] & (sizeof(*fde) - 1)) || fde[1] > (unsigned long)(fde + 1) - (unsigned long)unwind_data) { dbug_unwind(1, "fde[1]=%lx fde+1=%lx, unwind_data=%lx %lx\n", (unsigned long)fde[1], (unsigned long)(fde + 1), - (unsigned long)m->unwind_data, (unsigned long)(fde + 1) - (unsigned long)m->unwind_data); + (unsigned long)unwind_data, (unsigned long)(fde + 1) - (unsigned long)unwind_data); return NULL; /* this is not a valid FDE */ } /* cie pointer field is different in eh_frame vs debug_frame */ - if (m->unwind_is_ehframe) + if (is_ehframe) cie = fde + 1 - fde[1] / sizeof(*fde); else - cie = m->unwind_data + fde[1]; + cie = unwind_data + fde[1]; if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde) || (*cie & (sizeof(*cie) - 1)) @@ -488,7 +489,7 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, ptr = hdr + 4; end = hdr + m->unwind_hdr_len; - if (read_pointer(&ptr, end, hdr[1]) != (unsigned long)m->unwind_data) { + if (read_pointer(&ptr, end, hdr[1]) != (unsigned long)m->debug_frame) { dbug_unwind(1, "eh_frame_ptr not valid"); return NULL; } @@ -592,8 +593,8 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) return -EINVAL; } - if (unlikely(m->unwind_data_len == 0 || m->unwind_data_len & (sizeof(*fde) - 1))) { - dbug_unwind(1, "Module %s: unwind_data_len=%d", m->name, m->unwind_data_len); + if (unlikely(m->debug_frame_len == 0 || m->debug_frame_len & (sizeof(*fde) - 1))) { + dbug_unwind(1, "Module %s: unwind_data_len=%d", m->name, m->debug_fram_len); goto err; } @@ -602,7 +603,7 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) /* found the fde, now set startLoc and endLoc */ if (fde != NULL) { - cie = cie_for_fde(fde, m); + cie = cie_for_fde(fde, m->debug_frame, false); if (likely(cie != NULL && cie != &bad_cie && cie != ¬_fde)) { ptr = (const u8 *)(fde + 2); ptrType = fde_pointer_type(cie); @@ -625,10 +626,10 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) /* did not a good fde find with binary search, so do slow linear search */ if (fde == NULL) { - for (fde = m->unwind_data, tableSize = m->unwind_data_len; cie = NULL, tableSize > sizeof(*fde) - && tableSize - sizeof(*fde) >= *fde; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + for (fde = m->debug_frame, tableSize = m->debug_frame_len; cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { dbug_unwind(3, "fde=%lx tableSize=%d\n", (long)*fde, (int)tableSize); - cie = cie_for_fde(fde, m); + cie = cie_for_fde(fde, m->debug_frame, false); if (cie == &bad_cie) { cie = NULL; break; @@ -651,7 +652,7 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) } } - dbug_unwind(1, "cie=%lx fde=%lx startLoc=%lx endLoc=%lx\n", cie, fde, startLoc, endLoc); + dbug_unwind(1, "cie=%lx fde=%lx startLoc=%lx endLoc=%lx, pc=%lx\n", cie, fde, startLoc, endLoc, pc); if (cie == NULL || fde == NULL) goto err; diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h index 3b6d0de0..285a3a34 100644 --- a/runtime/unwind/unwind.h +++ b/runtime/unwind/unwind.h @@ -1,7 +1,7 @@ /* -*- linux-c -*- * * dwarf unwinder header file - * Copyright (C) 2008 Red Hat Inc. + * Copyright (C) 2008, 2009 Red Hat Inc. * Copyright (C) 2002-2006 Novell, Inc. * * This file is part of systemtap, and is free software. You can @@ -143,7 +143,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType); static const u32 bad_cie, not_fde; -static const u32 *cie_for_fde(const u32 *fde, const struct _stp_module *); +static const u32 *cie_for_fde(const u32 *fde, void *table, int is_ehframe); static signed fde_pointer_type(const u32 *cie); diff --git a/translate.cxx b/translate.cxx index 87811e9f..eaa2e942 100644 --- a/translate.cxx +++ b/translate.cxx @@ -4413,41 +4413,56 @@ struct unwindsym_dump_context }; -// Get the .debug_frame section for the given module. -// l will be set to the length of the size of the unwind data if found. -static void *get_unwind_data (Dwfl_Module *m, size_t *l) +// Get the .debug_frame end .eh_frame sections for the given module. +// Also returns the lenght of both sections when found, plus the section +// address of the eh_frame data. +static void get_unwind_data (Dwfl_Module *m, + void **debug_frame, void **eh_frame, + size_t *debug_len, size_t *eh_len, + Dwarf_Addr *eh_addr) { Dwarf_Addr bias = 0; - Dwarf *dw; GElf_Ehdr *ehdr, ehdr_mem; GElf_Shdr *shdr, shdr_mem; - Elf_Scn *scn = NULL; - Elf_Data *data = NULL; - - dw = dwfl_module_getdwarf(m, &bias); - if (dw != NULL) + Elf_Scn *scn; + Elf_Data *data; + Elf *elf; + + // fetch .eh_frame info preferably from main elf file. + elf = dwfl_module_getelf(m, &bias); + ehdr = gelf_getehdr(elf, &ehdr_mem); + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { - Elf *elf = dwarf_getelf(dw); - ehdr = gelf_getehdr(elf, &ehdr_mem); - while ((scn = elf_nextscn(elf, scn))) + shdr = gelf_getshdr(scn, &shdr_mem); + if (strcmp(elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name), + ".eh_frame") == 0) { - shdr = gelf_getshdr(scn, &shdr_mem); - if (strcmp(elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name), - ".debug_frame") == 0) - { - data = elf_rawdata(scn, NULL); - break; - } + data = elf_rawdata(scn, NULL); + *eh_frame = data->d_buf; + *eh_len = data->d_size; + *eh_addr = shdr->sh_addr; + break; } } - if (data != NULL) + // fetch .debug_frame info preferably from dwarf debuginfo file. + elf = (dwarf_getelf (dwfl_module_getdwarf (m, &bias)) + ?: dwfl_module_getelf (m, &bias)); + ehdr = gelf_getehdr(elf, &ehdr_mem); + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { - *l = data->d_size; - return data->d_buf; + shdr = gelf_getshdr(scn, &shdr_mem); + if (strcmp(elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name), + ".debug_frame") == 0) + { + data = elf_rawdata(scn, NULL); + *debug_frame = data->d_buf; + *debug_len = data->d_size; + break; + } } - - return NULL; } static int @@ -4680,17 +4695,21 @@ dump_unwindsyms (Dwfl_Module *m, } // Add unwind data to be included if it exists for this module. - size_t len = 0; - void *unwind = get_unwind_data (m, &len); - if (unwind != NULL) + void *debug_frame = NULL; + size_t debug_len = 0; + void *eh_frame = NULL; + size_t eh_len = 0; + Dwarf_Addr eh_addr = NULL; + get_unwind_data (m, &debug_frame, &eh_frame, &debug_len, &eh_len, &eh_addr); + if (debug_frame != NULL && debug_len > 0) { c->output << "#if defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)\n"; c->output << "static uint8_t _stp_module_" << stpmod_idx - << "_unwind_data[] = \n"; + << "_debug_frame[] = \n"; c->output << " {"; - for (size_t i = 0; i < len; i++) + for (size_t i = 0; i < debug_len; i++) { - int h = ((uint8_t *)unwind)[i]; + int h = ((uint8_t *)debug_frame)[i]; c->output << "0x" << hex << h << dec << ","; if ((i + 1) % 16 == 0) c->output << "\n" << " "; @@ -4698,7 +4717,25 @@ dump_unwindsyms (Dwfl_Module *m, c->output << "};\n"; c->output << "#endif /* STP_USE_DWARF_UNWINDER && STP_NEED_UNWIND_DATA */\n"; } - else + + if (eh_frame != NULL && eh_len > 0) + { + c->output << "#if defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)\n"; + c->output << "static uint8_t _stp_module_" << stpmod_idx + << "_eh_frame[] = \n"; + c->output << " {"; + for (size_t i = 0; i < debug_len; i++) + { + int h = ((uint8_t *)debug_frame)[i]; + c->output << "0x" << hex << h << dec << ","; + if ((i + 1) % 16 == 0) + c->output << "\n" << " "; + } + c->output << "};\n"; + c->output << "#endif /* STP_USE_DWARF_UNWINDER && STP_NEED_UNWIND_DATA */\n"; + } + + if (debug_frame == NULL && eh_frame == NULL) { // There would be only a small benefit to warning. A user // likely can't do anything about this; backtraces for the @@ -4755,25 +4792,40 @@ dump_unwindsyms (Dwfl_Module *m, c->output << ".path = " << lex_cast_qstring (mainfile) << ",\n"; c->output << ".dwarf_module_base = 0x" << hex << base << dec << ", \n"; + c->output << ".eh_frame_addr = 0x" << hex << eh_addr << dec << ", \n"; + + if (debug_frame != NULL) + { + c->output << "#if defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)\n"; + c->output << ".debug_frame = " + << "_stp_module_" << stpmod_idx << "_debug_frame, \n"; + c->output << ".debug_frame_len = " << debug_len << ", \n"; + c->output << "#else\n"; + } + + c->output << ".debug_frame = NULL,\n"; + c->output << ".debug_frame_len = 0,\n"; + + if (debug_frame != NULL) + c->output << "#endif /* STP_USE_DWARF_UNWINDER && STP_NEED_UNWIND_DATA*/\n"; - if (unwind != NULL) + if (eh_frame != NULL) { c->output << "#if defined(STP_USE_DWARF_UNWINDER) && defined(STP_NEED_UNWIND_DATA)\n"; - c->output << ".unwind_data = " - << "_stp_module_" << stpmod_idx << "_unwind_data, \n"; - c->output << ".unwind_data_len = " << len << ", \n"; + c->output << ".eh_frame = " + << "_stp_module_" << stpmod_idx << "_eh_frame, \n"; + c->output << ".eh_frame_len = " << eh_len << ", \n"; c->output << "#else\n"; } - c->output << ".unwind_data = NULL,\n"; - c->output << ".unwind_data_len = 0,\n"; + c->output << ".eh_frame = NULL,\n"; + c->output << ".eh_frame_len = 0,\n"; - if (unwind != NULL) + if (eh_frame != NULL) c->output << "#endif /* STP_USE_DWARF_UNWINDER && STP_NEED_UNWIND_DATA*/\n"; c->output << ".unwind_hdr = NULL,\n"; c->output << ".unwind_hdr_len = 0,\n"; - c->output << ".unwind_is_ehframe = 0,\n"; c->output << ".sections = _stp_module_" << stpmod_idx << "_sections" << ",\n"; c->output << ".num_sections = sizeof(_stp_module_" << stpmod_idx << "_sections)/" -- cgit From 6d079c65967609c416afc3092241219482507784 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Wed, 20 May 2009 15:40:29 +0200 Subject: Pass and use ptrType and is_ehframe to unwind adjustStartLoc. * runtime/unwind.c (adjustStartLoc): Add ptrType and is_ehframe as arguments. Use these to adjust location when necessary. (DEBUG_UNWIND): Move block before adjustStartLoc. Pass false for is_ehframe throughout. --- runtime/unwind.c | 119 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 66 insertions(+), 53 deletions(-) diff --git a/runtime/unwind.c b/runtime/unwind.c index 8ba3cf76..1fff3c61 100644 --- a/runtime/unwind.c +++ b/runtime/unwind.c @@ -428,21 +428,80 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc, s return result && ptr.p8 == end && (targetLoc == 0 || state->label == NULL); } +#ifdef DEBUG_UNWIND +static const char *_stp_enc_hi_name[] = { + "DW_EH_PE", + "DW_EH_PE_pcrel", + "DW_EH_PE_textrel", + "DW_EH_PE_datarel", + "DW_EH_PE_funcrel", + "DW_EH_PE_aligned" +}; +static const char *_stp_enc_lo_name[] = { + "_absptr", + "_uleb128", + "_udata2", + "_udata4", + "_udata8", + "_sleb128", + "_sdata2", + "_sdata4", + "_sdata8" +}; +static char *_stp_eh_enc_name(signed type) +{ + static char buf[64]; + int hi, low; + if (type == DW_EH_PE_omit) + return "DW_EH_PE_omit"; + + hi = (type & DW_EH_PE_ADJUST) >> 4; + low = type & DW_EH_PE_FORM; + if (hi > 5 || low > 4 || (low == 0 && (type & DW_EH_PE_signed))) { + sprintf(buf, "ERROR:encoding=0x%x", type); + return buf; + } + + buf[0] = 0; + if (type & DW_EH_PE_indirect) + strlcpy(buf, "DW_EH_PE_indirect|", sizeof(buf)); + strlcat(buf, _stp_enc_hi_name[hi], sizeof(buf)); + + if (type & DW_EH_PE_signed) + low += 4; + strlcat(buf, _stp_enc_lo_name[low], sizeof(buf)); + return buf; +} +#endif /* DEBUG_UNWIND */ + // If this is an address inside a module, adjust for section relocation // and the elfutils base relocation done during loading of the .dwarf_frame // in translate.cxx. static unsigned long adjustStartLoc (unsigned long startLoc, struct _stp_module *m, - struct _stp_section *s) + struct _stp_section *s, + unsigned ptrType, int is_ehframe) { /* XXX - some, or all, of this should really be done by - _stp_module_relocate. */ + _stp_module_relocate and/or read_pointer. */ + dbug_unwind(2, "adjustStartLoc=%lx, ptrType=%s, m=%s, s=%s\n", + startLoc, _stp_eh_enc_name(ptrType), m->name, s->name); if (startLoc == 0 || strcmp (m->name, "kernel") == 0 - || strcmp (s->name, ".absolute") == 0) + || (strcmp (s->name, ".absolute") == 0 && !is_ehframe)) return startLoc; + /* eh_frame data has been loaded in the kernel, so readjust offset. */ + if (is_ehframe) { + if ((ptrType & DW_EH_PE_ADJUST) == DW_EH_PE_pcrel) { + startLoc -= (unsigned long) m->eh_frame; + startLoc += m->eh_frame_addr; + } + if (strcmp (s->name, ".absolute") == 0) + return startLoc; + } + if (strcmp (s->name, ".dynamic") == 0) return startLoc + s->addr; @@ -503,7 +562,7 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, do { const u8 *cur = ptr + (num / 2) * (2 * tableSize); startLoc = read_pointer(&cur, cur + tableSize, hdr[3]); - startLoc = adjustStartLoc(startLoc, m, s); + startLoc = adjustStartLoc(startLoc, m, s, hdr[3], false); if (pc < startLoc) num /= 2; else { @@ -512,59 +571,13 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, } } while (startLoc && num > 1); - if (num == 1 && (startLoc = adjustStartLoc(read_pointer(&ptr, ptr + tableSize, hdr[3]), m, s)) != 0 && pc >= startLoc) + if (num == 1 && (startLoc = adjustStartLoc(read_pointer(&ptr, ptr + tableSize, hdr[3]), m, s, hdr[3], false)) != 0 && pc >= startLoc) fde = (void *)read_pointer(&ptr, ptr + tableSize, hdr[3]); dbug_unwind(1, "returning fde=%lx startLoc=%lx", fde, startLoc); return fde; } -#ifdef DEBUG_UNWIND -static const char *_stp_enc_hi_name[] = { - "DW_EH_PE", - "DW_EH_PE_pcrel", - "DW_EH_PE_textrel", - "DW_EH_PE_datarel", - "DW_EH_PE_funcrel", - "DW_EH_PE_aligned" -}; -static const char *_stp_enc_lo_name[] = { - "_absptr", - "_uleb128", - "_udata2", - "_udata4", - "_udata8", - "_sleb128", - "_sdata2", - "_sdata4", - "_sdata8" -}; -static char *_stp_eh_enc_name(signed type) -{ - static char buf[64]; - int hi, low; - if (type == DW_EH_PE_omit) - return "DW_EH_PE_omit"; - - hi = (type & DW_EH_PE_ADJUST) >> 4; - low = type & DW_EH_PE_FORM; - if (hi > 5 || low > 4 || (low == 0 && (type & DW_EH_PE_signed))) { - sprintf(buf, "ERROR:encoding=0x%x", type); - return buf; - } - - buf[0] = 0; - if (type & DW_EH_PE_indirect) - strlcpy(buf, "DW_EH_PE_indirect|", sizeof(buf)); - strlcat(buf, _stp_enc_hi_name[hi], sizeof(buf)); - - if (type & DW_EH_PE_signed) - low += 4; - strlcat(buf, _stp_enc_lo_name[low], sizeof(buf)); - return buf; -} -#endif /* DEBUG_UNWIND */ - /* Unwind to previous to frame. Returns 0 if successful, negative * number in case of an error. A positive return means unwinding is finished; * don't try to fallback to dumping addresses on the stack. */ @@ -608,7 +621,7 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) ptr = (const u8 *)(fde + 2); ptrType = fde_pointer_type(cie); startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); - startLoc = adjustStartLoc(startLoc, m, s); + startLoc = adjustStartLoc(startLoc, m, s, ptrType, false); dbug_unwind(2, "startLoc=%lx, ptrType=%s\n", startLoc, _stp_eh_enc_name(ptrType)); if (!(ptrType & DW_EH_PE_indirect)) @@ -639,7 +652,7 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) ptr = (const u8 *)(fde + 2); startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); - startLoc = adjustStartLoc(startLoc, m, s); + startLoc = adjustStartLoc(startLoc, m, s, ptrType, false); dbug_unwind(2, "startLoc=%lx, ptrType=%s\n", startLoc, _stp_eh_enc_name(ptrType)); if (!startLoc) continue; -- cgit From 27b8459045b2276a8bb9ec5f8697cf2931291c4c Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Wed, 20 May 2009 16:51:24 +0200 Subject: Use debug_frame table, then fallback to eh_frame when necessary. * runtime/unwind.c (unwind): Call new unwind_frame() first with debug_frame data, then if that wasn't able to unwind again with eh_frame data. (unwind_frame): Adapted version of old unwind() function that takes a table, table length and whether it is an eh_frame table. --- runtime/unwind.c | 63 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/runtime/unwind.c b/runtime/unwind.c index 1fff3c61..97a99fda 100644 --- a/runtime/unwind.c +++ b/runtime/unwind.c @@ -8,9 +8,9 @@ * * This code is released under version 2 of the GNU GPL. * - * This code currently does stack unwinding in the - * kernel and modules. It will need some extension to handle - * userspace unwinding. + * This code currently does stack unwinding in the kernel and modules. + * It has been extended to handle userspace unwinding using systemtap + * data structures. */ #include "unwind/unwind.h" @@ -511,7 +511,7 @@ adjustStartLoc (unsigned long startLoc, } /* If we previously created an unwind header, then use it now to binary search */ -/* for the FDE corresponding to pc. */ +/* for the FDE corresponding to pc. XXX FIXME not currently supported. */ static u32 *_stp_search_unwind_hdr(unsigned long pc, struct _stp_module *m, @@ -562,7 +562,7 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, do { const u8 *cur = ptr + (num / 2) * (2 * tableSize); startLoc = read_pointer(&cur, cur + tableSize, hdr[3]); - startLoc = adjustStartLoc(startLoc, m, s, hdr[3], false); + startLoc = adjustStartLoc(startLoc, m, s, hdr[3], true); if (pc < startLoc) num /= 2; else { @@ -581,7 +581,9 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, /* Unwind to previous to frame. Returns 0 if successful, negative * number in case of an error. A positive return means unwinding is finished; * don't try to fallback to dumping addresses on the stack. */ -static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) +static int unwind_frame(struct unwind_frame_info *frame, + struct _stp_module *m, struct _stp_section *s, + void *table, uint32_t table_len, int is_ehframe) { #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde, *cie = NULL; @@ -591,23 +593,10 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) unsigned i; signed ptrType = -1; uleb128_t retAddrReg = 0; - struct _stp_module *m; - struct _stp_section *s = NULL; struct unwind_state state; - dbug_unwind(1, "pc=%lx, %lx", pc, UNW_PC(frame)); - - if (UNW_PC(frame) == 0) - return -EINVAL; - - m = _stp_mod_sec_lookup (pc, tsk, &s); - if (unlikely(m == NULL)) { - dbug_unwind(1, "No module found for pc=%lx", pc); - return -EINVAL; - } - - if (unlikely(m->debug_frame_len == 0 || m->debug_frame_len & (sizeof(*fde) - 1))) { - dbug_unwind(1, "Module %s: unwind_data_len=%d", m->name, m->debug_fram_len); + if (unlikely(table_len == 0 || table_len & (sizeof(*fde) - 1))) { + dbug_unwind(1, "Module %s: frame_len=%d", m->name, table_len); goto err; } @@ -616,7 +605,7 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) /* found the fde, now set startLoc and endLoc */ if (fde != NULL) { - cie = cie_for_fde(fde, m->debug_frame, false); + cie = cie_for_fde(fde, table, is_ehframe); if (likely(cie != NULL && cie != &bad_cie && cie != ¬_fde)) { ptr = (const u8 *)(fde + 2); ptrType = fde_pointer_type(cie); @@ -639,10 +628,10 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) /* did not a good fde find with binary search, so do slow linear search */ if (fde == NULL) { - for (fde = m->debug_frame, tableSize = m->debug_frame_len; cie = NULL, tableSize > sizeof(*fde) + for (fde = table, tableSize = table_len; cie = NULL, tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { dbug_unwind(3, "fde=%lx tableSize=%d\n", (long)*fde, (int)tableSize); - cie = cie_for_fde(fde, m->debug_frame, false); + cie = cie_for_fde(fde, table, is_ehframe); if (cie == &bad_cie) { cie = NULL; break; @@ -869,5 +858,31 @@ done: #undef FRAME_REG } +static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) +{ + struct _stp_module *m; + struct _stp_section *s = NULL; + unsigned long pc = UNW_PC(frame) - frame->call_frame; + int res; + + dbug_unwind(1, "pc=%lx, %lx", pc, UNW_PC(frame)); + + if (UNW_PC(frame) == 0) + return -EINVAL; + + m = _stp_mod_sec_lookup (pc, tsk, &s); + if (unlikely(m == NULL)) { + dbug_unwind(1, "No module found for pc=%lx", pc); + return -EINVAL; + } + + res = unwind_frame (frame, m, s, m->debug_frame, + m->debug_frame_len, false); + if (res != 0) + res = unwind_frame (frame, m, s, m->eh_frame, + m->eh_frame_len, true); + + return res; +} #endif /* STP_USE_DWARF_UNWINDER */ -- cgit From 7872a5b9d76dc78d8956de3d2a11757783121674 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Wed, 20 May 2009 23:11:43 +0200 Subject: Properly read eh_frame and pass is_ehframe correctly. * runtime/unwind.c (adjustStartLoc): Add extra dbug_unwind. (_stp_search_unwind_hdr): Always pass true for is_ehframe. (unwind_frame): Properly pass through is_ehframe to adjustStartLoc(). (unwind): Add extra dbug_unwind. * translate.cxx (dump_unwindsyms): Output and use correct eh_frame and eh_len. --- runtime/unwind.c | 16 ++++++++++------ translate.cxx | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/runtime/unwind.c b/runtime/unwind.c index 97a99fda..43bda717 100644 --- a/runtime/unwind.c +++ b/runtime/unwind.c @@ -485,8 +485,8 @@ adjustStartLoc (unsigned long startLoc, { /* XXX - some, or all, of this should really be done by _stp_module_relocate and/or read_pointer. */ - dbug_unwind(2, "adjustStartLoc=%lx, ptrType=%s, m=%s, s=%s\n", - startLoc, _stp_eh_enc_name(ptrType), m->name, s->name); + dbug_unwind(2, "adjustStartLoc=%lx, ptrType=%s, m=%s, s=%s eh=%d\n", + startLoc, _stp_eh_enc_name(ptrType), m->name, s->name, is_ehframe); if (startLoc == 0 || strcmp (m->name, "kernel") == 0 || (strcmp (s->name, ".absolute") == 0 && !is_ehframe)) @@ -494,6 +494,7 @@ adjustStartLoc (unsigned long startLoc, /* eh_frame data has been loaded in the kernel, so readjust offset. */ if (is_ehframe) { + dbug_unwind(2, "eh_frame=%lx, eh_frame_addr=%lx\n", (unsigned long) m->eh_frame, m->eh_frame_addr); if ((ptrType & DW_EH_PE_ADJUST) == DW_EH_PE_pcrel) { startLoc -= (unsigned long) m->eh_frame; startLoc += m->eh_frame_addr; @@ -571,7 +572,7 @@ static u32 *_stp_search_unwind_hdr(unsigned long pc, } } while (startLoc && num > 1); - if (num == 1 && (startLoc = adjustStartLoc(read_pointer(&ptr, ptr + tableSize, hdr[3]), m, s, hdr[3], false)) != 0 && pc >= startLoc) + if (num == 1 && (startLoc = adjustStartLoc(read_pointer(&ptr, ptr + tableSize, hdr[3]), m, s, hdr[3], true)) != 0 && pc >= startLoc) fde = (void *)read_pointer(&ptr, ptr + tableSize, hdr[3]); dbug_unwind(1, "returning fde=%lx startLoc=%lx", fde, startLoc); @@ -610,7 +611,7 @@ static int unwind_frame(struct unwind_frame_info *frame, ptr = (const u8 *)(fde + 2); ptrType = fde_pointer_type(cie); startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); - startLoc = adjustStartLoc(startLoc, m, s, ptrType, false); + startLoc = adjustStartLoc(startLoc, m, s, ptrType, is_ehframe); dbug_unwind(2, "startLoc=%lx, ptrType=%s\n", startLoc, _stp_eh_enc_name(ptrType)); if (!(ptrType & DW_EH_PE_indirect)) @@ -641,7 +642,7 @@ static int unwind_frame(struct unwind_frame_info *frame, ptr = (const u8 *)(fde + 2); startLoc = read_pointer(&ptr, (const u8 *)(fde + 1) + *fde, ptrType); - startLoc = adjustStartLoc(startLoc, m, s, ptrType, false); + startLoc = adjustStartLoc(startLoc, m, s, ptrType, is_ehframe); dbug_unwind(2, "startLoc=%lx, ptrType=%s\n", startLoc, _stp_eh_enc_name(ptrType)); if (!startLoc) continue; @@ -876,11 +877,14 @@ static int unwind(struct unwind_frame_info *frame, struct task_struct *tsk) return -EINVAL; } + dbug_unwind(1, "trying debug_frame\n"); res = unwind_frame (frame, m, s, m->debug_frame, m->debug_frame_len, false); - if (res != 0) + if (res != 0) { + dbug_unwind(1, "debug_frame failed: %d, trying eh_frame\n", res); res = unwind_frame (frame, m, s, m->eh_frame, m->eh_frame_len, true); + } return res; } diff --git a/translate.cxx b/translate.cxx index eaa2e942..62c71aeb 100644 --- a/translate.cxx +++ b/translate.cxx @@ -4724,9 +4724,9 @@ dump_unwindsyms (Dwfl_Module *m, c->output << "static uint8_t _stp_module_" << stpmod_idx << "_eh_frame[] = \n"; c->output << " {"; - for (size_t i = 0; i < debug_len; i++) + for (size_t i = 0; i < eh_len; i++) { - int h = ((uint8_t *)debug_frame)[i]; + int h = ((uint8_t *)eh_frame)[i]; c->output << "0x" << hex << h << dec << ","; if ((i + 1) % 16 == 0) c->output << "\n" << " "; -- cgit From 34029cd3afe690f8481f8921047ec39dc325d945 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Wed, 20 May 2009 17:52:44 -0400 Subject: Add the schedtimes.stp and associated schedtimes.meta files to the examples. --- testsuite/systemtap.examples/index.html | 3 + testsuite/systemtap.examples/index.txt | 10 ++ testsuite/systemtap.examples/keyword-index.html | 12 ++ testsuite/systemtap.examples/keyword-index.txt | 40 ++++++ .../systemtap.examples/process/schedtimes.meta | 13 ++ .../systemtap.examples/process/schedtimes.stp | 154 +++++++++++++++++++++ 6 files changed, 232 insertions(+) create mode 100644 testsuite/systemtap.examples/process/schedtimes.meta create mode 100755 testsuite/systemtap.examples/process/schedtimes.stp diff --git a/testsuite/systemtap.examples/index.html b/testsuite/systemtap.examples/index.html index 0df681ac..e5673138 100644 --- a/testsuite/systemtap.examples/index.html +++ b/testsuite/systemtap.examples/index.html @@ -112,6 +112,9 @@ keywords: SYSCALL process/pf2.stp - Profile kernel functions
keywords: PROFILING

The pf2.stp script sets up time-based sampling. Every five seconds it prints out a sorted list with the top ten kernel functions with samples.

+
  • process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints
    +keywords: PROCESS SCHEDULER TIME TRACEPOINT
    +

    The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID.

  • process/sig_by_pid.stp - Signal Counts by Process ID
    keywords: SIGNALS

    Print signal counts by process ID in descending order.

  • diff --git a/testsuite/systemtap.examples/index.txt b/testsuite/systemtap.examples/index.txt index fa344933..4eef904c 100644 --- a/testsuite/systemtap.examples/index.txt +++ b/testsuite/systemtap.examples/index.txt @@ -224,6 +224,16 @@ keywords: profiling samples. +process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints +keywords: process scheduler time tracepoint + + The schedtimes.stp script instruments the scheduler to track the + amount of time that each process spends running, sleeping, queued, + and waiting for io. On exit the script prints out the accumulated + time for each state of processes observed. Optionally, this script + can be used with the '-c' or '-x' options to focus on a specific PID. + + process/sig_by_pid.stp - Signal Counts by Process ID keywords: signals diff --git a/testsuite/systemtap.examples/keyword-index.html b/testsuite/systemtap.examples/keyword-index.html index 7edbec21..b7f52246 100644 --- a/testsuite/systemtap.examples/keyword-index.html +++ b/testsuite/systemtap.examples/keyword-index.html @@ -189,6 +189,9 @@ keywords: NETWORK process/errsnoop.stp - tabulate system call errors
    keywords: PROCESS SYSCALL

    The script prints a periodic tabular report about failing system calls, by process and by syscall failure. The first optional argument specifies the reporting interval (in seconds, default 5); the second optional argument gives a screen height (number of lines in the report, default 20).

    +
  • process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints
    +keywords: PROCESS SCHEDULER TIME TRACEPOINT
    +

    The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID.

  • PROFILING

      @@ -219,6 +222,9 @@ keywords: SYSCALL SCHEDULER
        +
      • process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints
        +keywords: PROCESS SCHEDULER TIME TRACEPOINT
        +

        The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID.

      • process/sleepingBeauties.stp - Generating Backtraces of Threads Waiting for IO Operations
        keywords: IO SCHEDULER BACKTRACE

        The script monitors the time that threads spend waiting for IO operations (in "D" state) in the wait_for_completion function. If a thread spends over 10ms, its name and backtrace is printed, and later so is the total delay.

      • @@ -294,6 +300,9 @@ keywords: NETWORK io/iotime.stp - Trace Time Spent in Read and Write for Files
        keywords: SYSCALL READ WRITE TIME IO

        The script watches each open, close, read, and write syscalls on the system. For each file the scripts observes opened it accumulates the amount of wall clock time spend in read and write operations and the number of bytes read and written. When a file is closed the script prints out a pair of lines for the file. Both lines begin with a timestamp in microseconds, the PID number, and the executable name in parenthesese. The first line with the "access" keyword lists the file name, the attempted number of bytes for the read and write operations. The second line with the "iotime" keyword list the file name and the number of microseconds accumulated in the read and write syscalls.

        +
      • process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints
        +keywords: PROCESS SCHEDULER TIME TRACEPOINT
        +

        The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID.

      TRACE

        @@ -306,6 +315,9 @@ keywords: TRACE network/dropwatch.stp - Watch Where Socket Buffers are Freed in the Kernel
        keywords: NETWORK TRACEPOINT BUFFER FREE

        Every five seconds the dropwatch.stp script lists the number of socket buffers freed at locations in the kernel.

        +
      • process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints
        +keywords: PROCESS SCHEDULER TIME TRACEPOINT
        +

        The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID.

      TRAFFIC

        diff --git a/testsuite/systemtap.examples/keyword-index.txt b/testsuite/systemtap.examples/keyword-index.txt index b53e776f..c0082e36 100644 --- a/testsuite/systemtap.examples/keyword-index.txt +++ b/testsuite/systemtap.examples/keyword-index.txt @@ -337,6 +337,16 @@ keywords: process syscall in the report, default 20). +process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints +keywords: process scheduler time tracepoint + + The schedtimes.stp script instruments the scheduler to track the + amount of time that each process spends running, sleeping, queued, + and waiting for io. On exit the script prints out the accumulated + time for each state of processes observed. Optionally, this script + can be used with the '-c' or '-x' options to focus on a specific PID. + + = PROFILING = io/iostats.stp - List Executables Reading and Writing the Most Data @@ -417,6 +427,16 @@ keywords: syscall read write time io = SCHEDULER = +process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints +keywords: process scheduler time tracepoint + + The schedtimes.stp script instruments the scheduler to track the + amount of time that each process spends running, sleeping, queued, + and waiting for io. On exit the script prints out the accumulated + time for each state of processes observed. Optionally, this script + can be used with the '-c' or '-x' options to focus on a specific PID. + + process/sleepingBeauties.stp - Generating Backtraces of Threads Waiting for IO Operations keywords: io scheduler backtrace @@ -606,6 +626,16 @@ keywords: syscall read write time io syscalls. +process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints +keywords: process scheduler time tracepoint + + The schedtimes.stp script instruments the scheduler to track the + amount of time that each process spends running, sleeping, queued, + and waiting for io. On exit the script prints out the accumulated + time for each state of processes observed. Optionally, this script + can be used with the '-c' or '-x' options to focus on a specific PID. + + = TRACE = general/para-callgraph.stp - Callgraph tracing with arguments @@ -628,6 +658,16 @@ keywords: network tracepoint buffer free socket buffers freed at locations in the kernel. +process/schedtimes.stp - Track Time Processes Spend in Various States using Tracepoints +keywords: process scheduler time tracepoint + + The schedtimes.stp script instruments the scheduler to track the + amount of time that each process spends running, sleeping, queued, + and waiting for io. On exit the script prints out the accumulated + time for each state of processes observed. Optionally, this script + can be used with the '-c' or '-x' options to focus on a specific PID. + + = TRAFFIC = network/nettop.stp - Periodic Listing of Processes Using Network Interfaces diff --git a/testsuite/systemtap.examples/process/schedtimes.meta b/testsuite/systemtap.examples/process/schedtimes.meta new file mode 100644 index 00000000..0074731f --- /dev/null +++ b/testsuite/systemtap.examples/process/schedtimes.meta @@ -0,0 +1,13 @@ +title: Track Time Processes Spend in Various States using Tracepoints +name: schedtimes.stp +version: 1.0 +author: Jason Baron +keywords: process scheduler time tracepoint +subsystem: scheduler +status: production +exit: user-controlled +output: sorted-list +scope: system-wide +description: The schedtimes.stp script instruments the scheduler to track the amount of time that each process spends running, sleeping, queued, and waiting for io. On exit the script prints out the accumulated time for each state of processes observed. Optionally, this script can be used with the '-c' or '-x' options to focus on a specific PID. +test_check: stap -p4 schedtimes.stp +test_installcheck: stap schedtimes.stp -c "sleep 1" diff --git a/testsuite/systemtap.examples/process/schedtimes.stp b/testsuite/systemtap.examples/process/schedtimes.stp new file mode 100755 index 00000000..e964dd58 --- /dev/null +++ b/testsuite/systemtap.examples/process/schedtimes.stp @@ -0,0 +1,154 @@ +#! /usr/bin/env stap + +############################################################ +# Schedtimes.stp +# Author: Jason Baron +# profiles threads and displays their run times, queued times, +# wait times, including i/o wait times. +# Has two modes. When no arguments are given it profiles all +# threads. Alternatively, you can pass -c "program name" +############################################################ + +//constants +global RUNNING=0, QUEUED=1, SLEEPING=2 + +global traced_pid +global run_time, queued_time, sleep_time, io_wait_time +global pid_state, pid_names +global previous_timestamp +global io_wait_count +global io_wait_incremented + +function get_iowait:long(queue:long) +{ + return @cast(queue,"rq","kernel")->nr_iowait->counter; +} + +probe kernel.trace("sched_switch") { + previous_pid = $prev->pid; + next_pid = $next->pid; + if (traced_pid) { + if (previous_pid != traced_pid) { + previous_pid = 0; + } + if (next_pid != traced_pid) { + next_pid = 0; + } + } + if (previous_pid) { + if (!([previous_pid] in pid_state)) { + //use this state as entry into state machine + previous_timestamp[previous_pid] = gettimeofday_us(); + pid_names[previous_pid] = kernel_string($prev->comm); + if ($prev->state > 0) { + pid_state[previous_pid] = SLEEPING; + } else if ($prev->state == 0) { + pid_state[previous_pid] = QUEUED; + } else { + printf("unknown transition:\n"); + printf("pid state: %d our state: %d\n", + $prev->state, pid_state[previous_pid]); + } + } else if (pid_state[previous_pid] == RUNNING) { + pid_names[previous_pid] = kernel_string($prev->comm); + t = gettimeofday_us(); + run_time[previous_pid] += (t - previous_timestamp[previous_pid]); + previous_timestamp[previous_pid] = t; + if ($prev->state > 0) { + if ((get_iowait($rq) - io_wait_count[previous_pid]) > 0) + io_wait_incremented[previous_pid] = 1; + pid_state[previous_pid] = SLEEPING; + } else if ($prev->state == 0) { + pid_state[previous_pid] = QUEUED; + } else { + printf("unknown transition:\n"); + printf("pid state: %d our state: %d\n", + $prev->state, pid_state[previous_pid]); + } + } else { + printf("unknown transition:\n"); + printf("%s pid state: %d our state: %d\n", + pid_names[previous_pid], + $prev->state, pid_state[previous_pid]); + } + } + if (next_pid) { + io_wait_count[next_pid] = get_iowait($rq); + if (!([next_pid] in pid_state)) { + //use this state as entry into state machine + previous_timestamp[next_pid] = gettimeofday_us(); + pid_state[next_pid] = RUNNING; + pid_names[next_pid] = kernel_string($next->comm); + } else if (pid_state[next_pid] == QUEUED) { + t = gettimeofday_us(); + queued_time[next_pid] += (t - previous_timestamp[next_pid]); + previous_timestamp[next_pid] = t; + pid_state[next_pid] = RUNNING; + pid_names[next_pid] = kernel_string($next->comm); + } else { + printf("unknown transition:\n"); + printf("%s pid state: %d our state: %d\n", + pid_names[next_pid], + $next->state, pid_state[next_pid]); + } + } +} + +probe kernel.trace("sched_wakeup") { + wakeup_pid = $p->pid; + if (traced_pid && (wakeup_pid != traced_pid)) next + if ((!$success) && (pid_state[wakeup_pid] != SLEEPING)) next + if (!wakeup_pid) next + + if (!([wakeup_pid] in pid_state)) { + //use this state as entry into state machine + previous_timestamp[wakeup_pid] = gettimeofday_us(); + pid_state[wakeup_pid] = QUEUED; + pid_names[wakeup_pid] = kernel_string($p->comm); + } else if (pid_state[wakeup_pid] == SLEEPING) { + t = gettimeofday_us(); + sleep_time[wakeup_pid] += (t - previous_timestamp[wakeup_pid]); + if (io_wait_incremented[wakeup_pid] == 1) { + io_wait_time[wakeup_pid] += (t - previous_timestamp[wakeup_pid]); + io_wait_incremented[wakeup_pid] = 0; + } + previous_timestamp[wakeup_pid] = t; + pid_state[wakeup_pid] = QUEUED; + pid_names[wakeup_pid] = kernel_string($p->comm); + } else { + printf("unknown transition:\n"); + printf("pid state: %d our state: %d\n", + $p->state, pid_state[wakeup_pid]); + } +} + +probe begin { + traced_pid = target(); + if (traced_pid == 0) { + printf("all mode\n"); + } else { + printf("target mode\n"); + printf("traced pid: %d\n", traced_pid); + } +} + +probe end { + t = gettimeofday_us(); + foreach (pid in pid_state) { + if (pid_state[pid] == SLEEPING) + sleep_time[pid] += (t - previous_timestamp[pid]); + if (pid_state[pid] == QUEUED) + queued_time[pid] += (t - previous_timestamp[pid]); + if (pid_state[pid] == RUNNING) + run_time[pid] += (t - previous_timestamp[pid]); + } + printf ("%16s: %6s %10s %10s %10s %10s %10s\n\n", + "execname", "pid", "run(us)", "sleep(us)", "io_wait(us)", + "queued(us)", "total(us)") + foreach (pid+ in run_time) { + printf("%16s: %6d %10d %10d %10d %10d %10d\n", + pid_names[pid], pid, run_time[pid], sleep_time[pid], + io_wait_time[pid], queued_time[pid], + (run_time[pid] + sleep_time[pid] + queued_time[pid])); + } +} -- cgit From 29e2616aeeb82605a6efe1dbc574b499781eafbe Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 20 May 2009 14:46:25 -0700 Subject: PR10177: init/kill time in sleepy context only Previously, _stp_init_time and _stp_kill_time were being called from begin/end/error probes, which will run with preemption disabled. The BUG reported on RT kernels showed that cpufreq_unregister_notifier can end up sleeping, which violates our preemption block. This patch moves the init/kill into systemtap_module_init/exit, where it is safe to sleep. The code maintains a new predicate with the define STAP_NEED_GETTIMEOFDAY, so we don't still incur any timer overhead if it's not used. --- tapset/timestamp_gtod.stp | 17 ++--------------- testsuite/buildok/gtod_init.stp | 13 +++++++++++++ testsuite/buildok/gtod_noinit.stp | 13 +++++++++++++ testsuite/systemtap.base/gtod_init.exp | 29 ----------------------------- translate.cxx | 14 ++++++++++++++ 5 files changed, 42 insertions(+), 44 deletions(-) create mode 100755 testsuite/buildok/gtod_init.stp create mode 100755 testsuite/buildok/gtod_noinit.stp delete mode 100644 testsuite/systemtap.base/gtod_init.exp diff --git a/tapset/timestamp_gtod.stp b/tapset/timestamp_gtod.stp index 43b127dc..b916a3b1 100644 --- a/tapset/timestamp_gtod.stp +++ b/tapset/timestamp_gtod.stp @@ -7,23 +7,10 @@ // Public License (GPL); either version 2, or (at your option) any // later version. -function _gettimeofday_init:long() %{ - THIS->__retvalue = _stp_init_time(); /* Kick off the Big Bang. */ +%{ +#define STAP_NEED_GETTIMEOFDAY 1 %} -probe begin(-0x8000000000000000) { - if (_gettimeofday_init() != 0) - error("couldn't initialize gettimeofday") -} - -function _gettimeofday_kill() %{ - _stp_kill_time(); /* Go to a beach. Drink a beer. */ -%} - -probe end(0x7FFFFFFFFFFFFFFF), error(0x7FFFFFFFFFFFFFFF) { - _gettimeofday_kill() -} - /** * sfunction gettimeofday_ns - Number of nanoseconds since UNIX epoch. diff --git a/testsuite/buildok/gtod_init.stp b/testsuite/buildok/gtod_init.stp new file mode 100755 index 00000000..1d76aeab --- /dev/null +++ b/testsuite/buildok/gtod_init.stp @@ -0,0 +1,13 @@ +#! stap -gp4 + +# check that STAP_NEED_GETTIMEOFDAY is defined with a gettimeofday +function check() %{ +#ifndef STAP_NEED_GETTIMEOFDAY +#error "gettimeofday should define STAP_NEED_GETTIMEOFDAY!" +#endif +%} + +probe begin { + check() + println(gettimeofday_s()) +} diff --git a/testsuite/buildok/gtod_noinit.stp b/testsuite/buildok/gtod_noinit.stp new file mode 100755 index 00000000..94a9dfdc --- /dev/null +++ b/testsuite/buildok/gtod_noinit.stp @@ -0,0 +1,13 @@ +#! stap -gp4 + +# check that STAP_NEED_GETTIMEOFDAY is NOT defined without a gettimeofday +function check() %{ +#ifdef STAP_NEED_GETTIMEOFDAY +#error "STAP_NEED_GETTIMEOFDAY should not be defined!" +#endif +%} + +probe begin { + check() + println(get_cycles()) +} diff --git a/testsuite/systemtap.base/gtod_init.exp b/testsuite/systemtap.base/gtod_init.exp deleted file mode 100644 index 48616b1f..00000000 --- a/testsuite/systemtap.base/gtod_init.exp +++ /dev/null @@ -1,29 +0,0 @@ -# test for checking initialization of the time subsystem -set test "gtod_init" - -# check that init and kill are both present with a gettimeofday -set time_init 0 -set time_kill 0 -spawn stap -p2 -e {probe begin { println(gettimeofday_s()) }} -expect { - -timeout 120 - -re {\n_gettimeofday_init:} { incr time_init; exp_continue } - -re {\n_gettimeofday_kill:} { incr time_kill; exp_continue } - timeout { fail "$test (timeout)" } - eof { - if {$time_init == 1} { pass "$test (init)" } { fail "$test (init $time_init)" } - if {$time_kill == 1} { pass "$test (kill)" } { fail "$test (kill $time_kill)" } - } -} -wait - -# check that init and kill are both NOT present without a gettimeofday -spawn stap -p2 -e {probe begin { println(get_cycles()) }} -expect { - -timeout 120 - -re {\n_gettimeofday_init:} { fail "$test (bad init)" } - -re {\n_gettimeofday_kill:} { fail "$test (bad kill)" } - timeout { fail "$test (timeout)" } - eof { pass "$test (no init/kill)" } -} -wait diff --git a/translate.cxx b/translate.cxx index 62c71aeb..9f45f5d1 100644 --- a/translate.cxx +++ b/translate.cxx @@ -1133,6 +1133,15 @@ c_unparser::emit_module_init () o->newline(-1) << "}"; o->newline() << "if (rc) goto out;"; + // initialize gettimeofday (if needed) + o->newline() << "#ifdef STAP_NEED_GETTIMEOFDAY"; + o->newline() << "rc = _stp_init_time();"; // Kick off the Big Bang. + o->newline() << "if (rc) {"; + o->newline(1) << "_stp_error (\"couldn't initialize gettimeofday\");"; + o->newline() << "goto out;"; + o->newline(-1) << "}"; + o->newline() << "#endif"; + o->newline() << "(void) probe_point;"; o->newline() << "(void) i;"; o->newline() << "(void) j;"; @@ -1359,6 +1368,11 @@ c_unparser::emit_module_exit () o->newline() << "#endif"; } + // teardown gettimeofday (if needed) + o->newline() << "#ifdef STAP_NEED_GETTIMEOFDAY"; + o->newline() << " _stp_kill_time();"; // Go to a beach. Drink a beer. + o->newline() << "#endif"; + // print final error/skipped counts if non-zero o->newline() << "if (atomic_read (& skipped_count) || " << "atomic_read (& error_count) || " -- cgit From 4acb6884cdfa8205a60b203aa9e48ab79efd9ea2 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Wed, 20 May 2009 23:59:26 +0200 Subject: Fix gcc warning about Dwarf_Addr initialization. * translate.cxx (dump_unwindsyms): Initialize eh_frame to 0, not NULL. --- translate.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/translate.cxx b/translate.cxx index 9f45f5d1..505c9fc6 100644 --- a/translate.cxx +++ b/translate.cxx @@ -4713,7 +4713,7 @@ dump_unwindsyms (Dwfl_Module *m, size_t debug_len = 0; void *eh_frame = NULL; size_t eh_len = 0; - Dwarf_Addr eh_addr = NULL; + Dwarf_Addr eh_addr = 0; get_unwind_data (m, &debug_frame, &eh_frame, &debug_len, &eh_len, &eh_addr); if (debug_frame != NULL && debug_len > 0) { -- cgit From d79591b3274ba961600d387f41277626608b02af Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Thu, 21 May 2009 00:38:46 -0400 Subject: some versions of bundled elfutils needs m4 for building --- systemtap.spec | 1 + 1 file changed, 1 insertion(+) diff --git a/systemtap.spec b/systemtap.spec index 332fedf8..d3b3f237 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -36,6 +36,7 @@ BuildRequires: nss-devel nss-tools pkgconfig %if %{with_bundled_elfutils} Source1: elfutils-%{elfutils_version}.tar.gz Patch1: elfutils-portability.patch +BuildRequires: m4 %define setup_elfutils -a1 %else BuildRequires: elfutils-devel >= %{elfutils_version} -- cgit From 1208cc21f63fff917e7817487d727d4cbe12d0ea Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Thu, 21 May 2009 10:28:55 -0400 Subject: PR10182: clean uprobes.ko during rpm upgrade --- systemtap.spec | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/systemtap.spec b/systemtap.spec index d3b3f237..c0864657 100644 --- a/systemtap.spec +++ b/systemtap.spec @@ -252,6 +252,13 @@ exit 0 chkconfig --del systemtap exit 0 +%post +# Remove any previously-built uprobes.ko materials +(make -C /usr/share/systemtap/runtime/uprobes clean) >/dev/null 3>&1 || true + +%preun +# Ditto +(make -C /usr/share/systemtap/runtime/uprobes clean) >/dev/null 3>&1 || true %files %defattr(-,root,root) -- cgit