summaryrefslogtreecommitdiffstats
path: root/vircmd
blob: ae20ac681ec6c8f28820dff3bb4c71182c88e9f3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash
#make running virsh commands on a cluster easier

####################
# show program usage
usage ()
{
    cat >&2 <<EOF
Usage: vircmd [OPTION] ... <COMMAND>
  options:
    -x                    enable script debugging 
    -n                    don't include the TSM node (if any)
    -w, --wait [timeout]  for start/shutdown wait for desired state change

  commands:
     start CLUSTERNAME
           start cluster nodes

     destroy CLUSTERNAME
           power off cluster nodes (may cause data loss)

     shutdown CLUSTERNAME
           shutdown cluster nodes

     undefine CLUSTERNAME
           remove cluster

  CLUSTERNAME can be a glob-style pattern that specifies cluster nodes
EOF
    exit 1
}

############################
# parse command line options
temp=$(getopt -n "$prog" -o "xnw::" -l help -l wait:: -- "$@")

[ $? != 0 ] && usage

eval set -- "$temp"

no_tsm=0
wait=false
timeout=""

while true ; do
    case "$1" in
	-x) set -x; shift ;;
	-n) no_tsm=1; shift ;;
	-w|--wait) wait=true ; timeout="$2" ; shift 2 ;;
	--) shift ; break ;;
	-h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
    esac
done

if [ $# -lt 2 ]; then
    echo "Usage: vircmd COMMAND CLUSTERNAME"
    exit 1
fi

cmd="$1"
cluster="$2"
count=0

if $wait ; then
    case "$cmd" in
	start)     desired_state="running"  ;;
	shutdown)  desired_state="shut off" ;;
	*) echo "waiting not supported with \"$cmd\"" ; echo ; usage ;;
    esac
fi

export VIRSH_DEFAULT_CONNECT_URI=qemu:///system

get_nodes ()
{
    for i in $domains ; do
	case "$i" in
	    ($1)
            # If we're not skipping the TSM node or this isn't the TSM node...
            if [ "$no_tsm" = 0 -o "${i/tsm/}" = "$i" ] ; then
		nodes="${nodes} ${i}"
	    fi
	esac
    done
}

domains=$(virsh list --all | awk '{print $2}' | tail -n +3)
nodes=""

# If the cluster name doesn't have a wildcard then we need to be inventive.
if [ "${cluster/[\[\]\?\*]/}" = "$cluster" ] ; then
    get_nodes "${cluster}[a-z]*[0-9]"
else
    get_nodes "$cluster"
fi

[ -n "$nodes" ] || {
    echo "No nodes in cluster $2"
    exit 1
}

rc=0

for i in $nodes ; do
    # We want to retry the command when we see an internal error.
    for x in $(seq 1 5) ; do
	out=$(virsh $cmd "$i" 2>&1)
	ret=$? # Hard to avoid this since we always want to echo $out :-(
	echo "$out"
	if [ $ret -ne 0 ] ; then
	    case "$out" in
		*internal\ error*)
		    echo "Retrying \"virsh $cmd $i\" due to internal error"
		    sleep 3
		    continue
	    esac
	fi
	break
    done
    [ $ret  = 0 ] || rc=$ret
done

# Now comes the waiting... but we don't wait if there was an error.
if [ $rc -ne 0 ] || ! $wait ; then
    exit $rc
fi

count=0
while : ; do
    if [ -n "$timeout" ] && [ $count -ge "$timeout" ] ; then
	echo "Timed out after ${timeout}s waiting for nodes to enter state \"${desired_state}\":"
	echo
	fmt='%-20s %s\n'
	printf "$fmt" "Domain" "State"
	printf "$fmt" "------" "-----"
	for i in $nodes ; do
	    state=$(virsh dominfo "$i" | sed -nr -e 's@^State:[[:space:]]+@@p')
	    printf "$fmt" "$i" "$state"
	done
	exit 62  # ETIME
    fi

    pat="^State:[[:space:]]+${desired_state}\$"
    all_good=true
    for i in $nodes ; do
	# Often "vircmd dominfo" returns 1 and prints rubbish like this:
	#  error: operation failed: could not query memory balloon allocation
	# so we take pains to avoid this cluttering the output...
	if virsh_out=$(virsh dominfo "$i" 2>&1) ; then
	    if ! echo "$virsh_out" | grep -E -q "$pat" ; then
		all_good=false
	    fi
	fi
    done
    $all_good && exit 0

    sleep 1
    count=$(($count + 1))
done

exit $rc