# An implementation of generating whitelist for safe-probes 
# in systemtap based on the discussion in thread:
#   http://sourceware.org/ml/systemtap/2006-q3/msg00574.html
#  
# Copyright (C) 2006 IBM Corp.
# 
# This file is part of systemtap, and is free software.  You can
# redistribute it and/or modify it under the terms of the GNU General
# Public License (GPL); either version 2, or (at your option) any
# later version.
#
# Main ideas:
# 1)Fetch a group of probe points from probes.pending, probe them 
#   and run some workloads(e.g. runltp) parallely meanwhile.
#
# 2)If the probe test ends without crash, those actually triggered
#   probe points are moved into probes.passed and those 
#   untriggered are into probes.untriggered; if the probe test 
#   crashes the system, it will be resumed automatically after 
#   system reboot. Those probe points which have been triggered are
#   also moved into probes.passed, but those untriggered ones are
#   moved into probes.failed.
#
# 3)Repeat the above until probes.pending becomes empty, then:
#   Normally, probes.pending is reinitialized from probes.failed
#   and probes.untriggered, then start the next iteration; 
#   But if max running level (now 3) is reached, or probes.pending,
#   probes.failed and probes.untriggered are all empty, stop the
#   whole test.
#
# http://sourceware.org/ml/systemtap/2006-q4/msg00143.html
# http://sourceware.org/ml/systemtap/2006-q4/msg00435.html
#
set TESTNAME "whitelist"

# Because this test will take several hours (depends on workloads
# and max_running_level) and probably cause more than once system 
# crashes, it is disabled by default. It's strongly suggested 
# to run it manually as a single test case if necessary. 
# Comment the following two lines to enable it.
untested "$TESTNAME is disabled"
return

#################################################################
#            supporting constants and procedures                #
#################################################################
set MAX_RUNNING_LEVEL 2

set STP_GENWHITELIST_RUNNING "/stp_genwhitelist_running"
set STAP_RESULT "probe.out"
set PROBES_ALL "probes.all"
set PROBES_PENDING "probes.pending"
set PROBES_CURRENT "probes.current"
set PROBES_PASSED "probes.passed"
set PROBES_FAILED "probes.failed"
set PROBES_UNTRIGGERED "probes.untriggered"

set systemtap_script {
    global stat
    probe %s {
        stat[pp()] <<< 1
    }
    probe timer.ms(30000) {
        foreach (pp in stat)
            printf("%%d %%s\n", @count(stat[pp]), pp)
    }
    probe end {
        foreach (pp in stat)
            printf("%%d %%s\n", @count(stat[pp]), pp)
    }
}

set benchs { 
        {/usr/local/ltp/runltp -t 300s -f syscalls}
        {/usr/local/ltp/runltp -t 300s -f nfs}
        {/usr/local/ltp/runltp -t 300s -f ipc}
        {/usr/local/ltp/runltp -t 300s -f dio}
        {/usr/local/ltp/runltp -t 300s -f fs}
        {/usr/local/ltp/runltp -t 300s -f mm}
        {/usr/local/ltp/runltp -t 300s -f tcp_cmds}
        {/usr/local/ltp/runltp -t 300s -f ltp-aio-stress.part1}
}

set init_probes_all_script {
    probe scheduler.*, 
        ioscheduler.*, 
        ioscheduler.*.return, 
        syscall.*, 
        syscall.*.return, 
        ioblock.*, 
        netdev.*, 
        vm.*, 
        signal.*, 
        signal.*.return, 
        udp.*, 
        udp.*.return, 
        tcp.*, 
        tcp.*.return, 
        kprocess.*, 
        process.*, 
        nfs.fop.*, 
        nfs.aop.*,
        nfsd.proc.*, 
        nfsd.*, 
        nfs.proc.*, 
        scsi.*, 
        sunrpc.*, 
        generic.fop.*, 
        generic.fop.*.return, 
        vfs.*, 
        vfs.*.return {}
}

proc do_current_test {} {
    global PROBES_CURRENT
    global STAP_RESULT
    global systemtap_script
    if [catch {open $PROBES_CURRENT r} Infile] {
        puts "Failed to open $PROBES_CURRENT"
        return
    }
    set probepoint [gets $Infile]
    while {![eof $Infile]} {
        set extra [gets $Infile]
        if {[string length $extra] > 0} {
            set probepoint [concat "$probepoint, " $extra]
            set extra [gets $Infile]
        }
    }
    catch {close $Infile}
    
    set testname "L[get_running_level]_Grp_[string range $probepoint 0 32]..."
    if [probe_ok $probepoint] {
        set script [format $systemtap_script $probepoint]
        whitelist_run $testname runbenchs -e $script -o $STAP_RESULT
    } else {
        puts "ERROR no match in probe_ok() $testname no match"
        set script [format $systemtap_script $probepoint]
        exec echo $script > sc.stp
    }
}

proc get_linesize {filename} {
    if [file readable $filename] {
      scan [exec wc -l $filename] "%d" lines
      if {[info exists lines] && $lines > 0} {
          return $lines
      }
    }
    return 0
}

proc get_running_level {} {
    global STP_GENWHITELIST_RUNNING
    if [file readable $STP_GENWHITELIST_RUNNING] {
        scan [exec cat $STP_GENWHITELIST_RUNNING] "%d" current_running_level
        if {[info exists current_running_level] && $current_running_level > 0} {
            return $current_running_level
        }
    }
    return 0
}

set NO_CRASH 0
set ALREADY_CRASHED 1
proc garbage_collect {{already_crashed $NO_CRASH}} {
    global STAP_RESULT
    global PROBES_CURRENT
    global PROBES_PASSED
    global PROBES_FAILED
    global PROBES_UNTRIGGERED

    if {[get_linesize $PROBES_CURRENT] == 0} {
        exec rm -f $PROBES_CURRENT $STAP_RESULT
        return
    }
    if {[get_linesize $STAP_RESULT] == 0} {
        if {$already_crashed} {
            exec cat $PROBES_CURRENT >> $PROBES_FAILED
        } else {
            exec cat $PROBES_CURRENT >> $PROBES_UNTRIGGERED
        }
        exec rm -f $PROBES_CURRENT $STAP_RESULT
        return
    }
    
    # both probes.current and probe.out are non-empty
    if [catch {open $PROBES_CURRENT r} Infile] {
        puts "Failed to open $PROBES_CURRENT"
        exec rm -f $PROBES_CURRENT $STAP_RESULT
        return
    }
    set probepoint [gets $Infile]
    while {![eof $Infile]} {
        if {[is_probed $probepoint $STAP_RESULT]} {
            exec echo $probepoint >> $PROBES_PASSED
        } else {
            if {$already_crashed} {
                exec echo $probepoint >> $PROBES_FAILED
            } else {
                exec echo $probepoint >> $PROBES_UNTRIGGERED
            }
        }
        set probepoint [gets $Infile]
    }
    catch {close $Infile}
    exec rm -f $PROBES_CURRENT $STAP_RESULT
    return
}

proc incr_running_level {} {
    global STP_GENWHITELIST_RUNNING
    set newlevel [expr [get_running_level]+1]
    if { $newlevel > 0 } {
        exec echo $newlevel > $STP_GENWHITELIST_RUNNING
    } else {
        exec echo 0 > $STP_GENWHITELIST_RUNNING
    }
    return
}

proc init_probes_all {} {
    global PROBES_ALL
    global init_probes_all_script
    catch {exec stap -p2 -e $init_probes_all_script > /tmp/whitelist_tmpfile}
    catch {exec grep "^kernel.function" /tmp/whitelist_tmpfile > $PROBES_ALL }
    catch {exec rm -f /tmp/whitelist_tmpfile}
    if {[get_linesize $PROBES_ALL] == 0} {
        return 1
    }
    return 0
}

proc init_running_level {} {
    global STP_GENWHITELIST_RUNNING
    exec echo 1 > $STP_GENWHITELIST_RUNNING
}

proc is_probed {probepoint stap_result} {
    if {[string length $probepoint] == 0 || [get_linesize $stap_result] == 0 } {
        return 0
    }
    if [catch {open $stap_result r} Resfile] {
         puts "Failed to open $stap_result in is_probed() proc"
         return 0
    }
    set probed 0
    set resline [gets $Resfile]
    while {![eof $Resfile]} {
        if {[string match "* $probepoint" $resline]} {
            set probed 1
            break
         }
      set resline [gets $Resfile]
    }
    catch {close $Resfile}
    return $probed
}

proc probe_ok {probepoint} {
    set cmd {exec stap -p2 -e }
    lappend cmd "probe $probepoint {}"
    exec echo $cmd > cmdfile
    return ![catch $cmd]
}

proc proper_current_size {level inputfile} {
     set totalsize [get_linesize $inputfile]
     switch $level {
         {1} {set currentsize [expr $totalsize/9]}
         {2} {set currentsize [expr $totalsize/49]}
         {3} {set currentsize 50}
         {4} {set currentsize 10}
         default {
              puts "Reached unexpected iteration level: $level"
              set currentsize $totalsize
         }
     }
     if {$currentsize <= 0} {
         set currentsize 5
     }
     return $currentsize
}

set startup_line_in_RCLOCAL "cd $env(PWD); runtest whitelist.exp&"
set RCLOCAL "/etc/rc.d/rc.local"

proc register_service {} {
    global startup_line_in_RCLOCAL
    global RCLOCAL
    exec sed -i -n -e "/runtest whitelist.exp/!p" $RCLOCAL
    exec echo $startup_line_in_RCLOCAL >> $RCLOCAL
}

proc unregister_service {} {
    global RCLOCAL
    exec sed -i -n -e "/runtest whitelist.exp/!p" $RCLOCAL
}

proc whitelist_run { TEST_NAME {LOAD_GEN_FUNCTION ""} args } {
    set cmd [concat {stap -DMAXSKIPPED=200000 -v } $args]
    catch {eval spawn $cmd}
    set stap_id $spawn_id
    set failed 1
    expect {
	-timeout 1800
	-i $stap_id -re {^Pass\ ([1234]):\ [^\r]*\r\n} {
            set error_msg "pass$expect_out(1,string)";
            exp_continue
        }
	-re {Pass\ 5:\ starting\ run.\r\n} {
            set error_msg "stap runtime"
            runbenchs
            exec kill -INT -[exp_pid -i $stap_id]
            exp_continue
        }
        -re {Pass\ 5:\ run\ completed} {
            set failed 0
        }
	-re {parse\ error|semantic\ error} { set detail "$expect_out(0,string)" }
	timeout { set detail "stap timeout"; exec kill -INT -[exp_pid -i $stap_id] }
	eof { set failed 0 }
    }
    catch {close -i $stap_id}
    wait -i $stap_id
    if {$failed} { puts "whitelist_run failure \($detail\)" }
}

proc runbenchs {} {
    global benchs
    set runningcount 0

    foreach bench $benchs {
            set benchexec [lrange $bench 0 0]
            set benchargs [join [lrange $bench 1 [llength $bench]]]
            if {[file executable $benchexec]} {
                catch {eval spawn $benchexec $benchargs}
                set benchname($spawn_id) $benchexec
            } else {
                catch {eval spawn sleep 30}
                set benchname($spawn_id) "sleep"
            }
            lappend idlist $spawn_id
            incr runningcount
    }

    while {$runningcount > 0} {
    	expect {
	        -timeout 900
    		-i $idlist -re {LTP\ Version:\ LTP-([0-9])+\r\n$} {
    			set from $expect_out(spawn_id)
    			lappend benchres($from) $expect_out(buffer)
    		} eof {
    			set donepos [lsearch $idlist $expect_out(spawn_id)]
    			set idlist [lreplace $idlist $donepos $donepos]
    			incr runningcount -1
                        wait -i $expect_out(spawn_id)
    		} timeout {
    			break
    		}
    	}
    }
}

###################################################################
#                 Main routine of the whole test                  #
###################################################################
if {[info procs installtest_p] != "" && ![installtest_p]} { 
    untested $TESTNAME; 
    return 
}

if {[get_running_level] == 0} {
    # Not started yet, start the whole test from the scratch
    # Append the startup code to /etc/rc.d/rc.local if not yet
    register_service
    # Check whether probes.all is empty or not given
    if {[get_linesize $PROBES_ALL] == 0} {
        if {[init_probes_all] != 0} {
           fail "$TESTNAME unable to initialize the probe point list"
           return
        }
    }
    # Set current_running_level as 1 to indicate a new test started
    init_running_level
    # Initialize intermediate files based on probe.all
    exec rm -f $PROBES_PENDING $PROBES_CURRENT
    exec rm -f $PROBES_PASSED $PROBES_UNTRIGGERED $PROBES_FAILED
    file copy $PROBES_ALL $PROBES_PENDING
    exec touch $PROBES_PASSED $PROBES_UNTRIGGERED $PROBES_FAILED
    puts "Start a fresh stp_genwhitelist test."
} else {
    # Maybe started already, so do some cleaning if necessary
    garbage_collect $ALREADY_CRASHED
    puts "Recovered from last maybe crashed probe test."
}

set current_size_const [proper_current_size [get_running_level] $PROBES_ALL]
puts "current_size_const is initialized as $current_size_const"

while {1} {
    puts "Current size of probes.pending is [get_linesize $PROBES_PENDING]"
    if {[get_linesize $PROBES_PENDING] == 0} {
        # Check whether we need the next iteration or not
        global MAX_RUNNING_LEVEL
        # incr running_level for the start of a new iteration
        incr_running_level
        puts "Running level increased to [get_running_level]"
        if {[get_running_level] > $MAX_RUNNING_LEVEL} {
            puts "Exceed max running level limit."
            break
        } else {
            puts "Current running level is [get_running_level]"
            exec rm -f $PROBES_PENDING
            if {[get_linesize $PROBES_FAILED] > 0} {
                # Append probes.failed to probes.pending
                exec cat $PROBES_FAILED >> $PROBES_PENDING
                file delete $PROBES_FAILED
                exec touch $PROBES_FAILED
                puts "Append $PROBES_FAILED to $PROBES_PENDING"
            } 
            if {[get_linesize $PROBES_UNTRIGGERED] > 0} {
                # Append probes.untriggered to probes.pending
                exec cat $PROBES_UNTRIGGERED >> $PROBES_PENDING
                file delete $PROBES_UNTRIGGERED
                exec touch $PROBES_UNTRIGGERED
                puts "Append $PROBES_UNTRIGGERED to $PROBES_PENDING"
            } 
            if {[get_linesize $PROBES_PENDING] == 0} {
                # No more pending probe points
                puts "No more iterations needed. Stopped."
                break
            }
            # set new value of current_size_const for new iteration level
            set current_size_const [proper_current_size [get_running_level] $PROBES_ALL]
            puts "current_size_const is set as $current_size_const now"
            continue
        }
    } 
    # Now, non-empty probes.pending should be ready
    # Generate probes.current
    exec rm -f $PROBES_CURRENT
    exec head -n $current_size_const $PROBES_PENDING > $PROBES_CURRENT
    exec tail -n+[expr $current_size_const+1] $PROBES_PENDING > /tmp/whitelist_tmpfile
    exec mv /tmp/whitelist_tmpfile $PROBES_PENDING

    puts "Start a probe test..."
    # Do actual probe test
    do_current_test
    puts "Completed one probe test."

    # No crash fortunately, so do some cleaning to prepare for next test
    garbage_collect $NO_CRASH
}

# Congratulations for arriving here
# Remove all temporary files and unregister myself
puts "Remove all temporary files, unregister the service and return."
exec rm -f $PROBES_PENDING $PROBES_CURRENT $STP_GENWHITELIST_RUNNING 
exec rm -f $STAP_RESULT /tmp/whitelist_tmpfile
unregister_service

pass "$TESTNAME completed"
return