summaryrefslogtreecommitdiffstats
path: root/playbooks/vhost_reboot.yml
blob: ad80aaf7a4663de2632d92c8c7c00308f11388c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#
# This playbook lets you safely reboot a virthost and all it's guests.
#
# requires --extra-vars="target=somevhost fqdn"
# Might add nodns=true or nonagios=true to the extra vars

#General overview:
# talk to the vhost
# get back list of instances
# add each of their hostnames to an addhoc group
# halt each of them in a second play
# wait for them to die
# third play, reboot the vhost
#     wait for vhost to come back

# TODO: Figure out how to compare virt info pre and post boot.

- name: find instances
  hosts: "{{ target }}"
  gather_facts: False
  user: root

  tasks:
  - name: get list of guests
    virt: command=list_vms
    register: vmlist

#  - name: get info on guests (prereboot)
#    virt: command=info
#    register: vminfo_pre

  - name: add them to myvms_new group
    local_action: add_host hostname={{ item }} groupname=myvms_new
    with_items: "{{ vmlist.list_vms }}"

# Call out to another playbook.  Disable any proxies that may live here
- include: update-proxy-dns.yml status=disable proxies=myvms_new:&proxies
  when: nodns is not defined or not "true" in nodns

- name: halt instances
  hosts: myvms_new
  user: root
  gather_facts: False
  serial: 1

  tasks:
  - name: schedule regular host downtime
    nagios: action=downtime minutes=30 service=host host={{ inventory_hostname_short }}{{ env_suffix }}
    delegate_to: noc01.phx2.fedoraproject.org
    ignore_errors: true
    when: nonagios is not defined or not nonagios

  - name: halt the vm instances - to poweroff
    command: /sbin/shutdown -h 1
    ignore_errors: true
    # if one of them is down we don't care

- name: wait for the whole set to die.
  hosts: myvms_new
  gather_facts: False
  user: root

  tasks:
  - name: wait for them to die
    local_action: wait_for port=22 delay=30 timeout=300 state=stopped host={{ inventory_hostname }}

- name: reboot vhost
  hosts: "{{ target }}"
  gather_facts: False
  user: root

  tasks:
  - name: tell nagios to shush
    nagios: action=downtime minutes=60 service=host host={{ inventory_hostname_short }}{{ env_suffix }}
    delegate_to: noc01.phx2.fedoraproject.org
    ignore_errors: true
    when: nonagios is not defined or not nonagios

  - name: reboot the virthost
    command: /sbin/shutdown -r 1

  - name: wait for virthost to come back - up to 15 minutes
    local_action: wait_for host={{ target }} port=22 delay=120 timeout=900 search_regex=OpenSSH

  - name: wait for libvirtd to come back on the virthost
    wait_for: path=/var/run/libvirtd.pid state=present delay=10

  - name: look up vmlist
    virt: command=list_vms
    register: newvmlist

  - name: sync time
    command: ntpdate -u 1.rhel.pool.ntp.org

  - name: serverbeach hosts need a special iptables config
    command: /root/fix-iptables.sh
    when: inventory_hostname_short.startswith('serverbeach')

  - name: tell nagios to unshush
    nagios: action=unsilence service=host host={{ inventory_hostname_short }}{{ env_suffix }}
    delegate_to: noc01.phx2.fedoraproject.org
    ignore_errors: true
    when: nonagios is not defined or not nonagios

# Call out to that dns playbook.  Put proxies back in now that they're back
- include: update-proxy-dns.yml status=enable proxies=myvms_new:&proxies
  when: nodns is not defined or not nodns

- name: Fix unbound if necessary
  # intersection - hosts that are in our dynamic group and also in unbound-dns
  hosts: "myvms_new:&unbound"
  user: root

  vars_files:
   - /srv/web/infra/ansible/vars/global.yml
   - /srv/web/infra/ansible/vars/{{ ansible_distribution }}.yml

  tasks:
  - include: "{{ tasks_path }}/restart_unbound.yml"

#  - name: get info on guests (postreboot)
#    virt: command=info
#    register: vminfo_post