Statistics
| Branch: | Tag: | Revision:

one / share / hooks / host_error.rb @ c2159551

History | View | Annotate | Download (7.52 KB)

1
#!/usr/bin/env ruby
2

    
3
# -------------------------------------------------------------------------- #
4
# Copyright 2002-2016, OpenNebula Project, OpenNebula Systems                #
5
#                                                                            #
6
# Licensed under the Apache License, Version 2.0 (the "License"); you may    #
7
# not use this file except in compliance with the License. You may obtain    #
8
# a copy of the License at                                                   #
9
#                                                                            #
10
# http://www.apache.org/licenses/LICENSE-2.0                                 #
11
#                                                                            #
12
# Unless required by applicable law or agreed to in writing, software        #
13
# distributed under the License is distributed on an "AS IS" BASIS,          #
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
15
# See the License for the specific language governing permissions and        #
16
# limitations under the License.                                             #
17
#--------------------------------------------------------------------------- #
18

    
19
##############################################################################
20
# Script to implement host failure tolerance
21
#   One of the following modes must be chosen
22
#           -m resched VMs to another host. (Only for images in shared storage!)
23
#           -r recreate VMs running in the host. State will be lost.
24
#           -d delete VMs running in the host
25
#
26
#   Additional flags
27
#           -f resubmit suspended and powered off VMs (only for recreate)
28
#           -p <n> avoid resubmission if host comes back after n monitoring
29
#                 cycles. 0 to disable it. Default is 2.
30
#           -u disables fencing. Fencing is enabled by default. Don't disable it
31
#                 unless you are very sure about what you're doing
32
##############################################################################
33

    
34
ONE_LOCATION=ENV["ONE_LOCATION"]
35

    
36
if !ONE_LOCATION
37
    RUBY_LIB_LOCATION="/usr/lib/one/ruby"
38
    VMDIR="/var/lib/one"
39
    CONFIG_FILE="/var/lib/one/config"
40
    LOG_FILE="/var/log/one/host_error.log"
41
else
42
    RUBY_LIB_LOCATION=ONE_LOCATION+"/lib/ruby"
43
    VMDIR=ONE_LOCATION+"/var"
44
    CONFIG_FILE=ONE_LOCATION+"/var/config"
45
    LOG_FILE=ONE_LOCATION+"/var/host_error.log"
46
end
47

    
48
FENCE_HOST = File.dirname(__FILE__) + '/fence_host.sh'
49

    
50
$: << RUBY_LIB_LOCATION
51

    
52
require 'opennebula'
53
include OpenNebula
54

    
55
require 'getoptlong'
56
require 'base64'
57
require 'open3'
58

    
59
################################################################################
60
# Arguments
61
################################################################################
62

    
63
HOST_ID = ARGV[0]
64

    
65
if HOST_ID.nil?
66
    exit -1
67
end
68

    
69
################################################################################
70
# Methods
71
################################################################################
72

    
73
def log(msg, level="I")
74
    File.open(LOG_FILE, 'a') do |f|
75
        msg.lines do |l|
76
            f.puts "[#{Time.now}][HOST #{HOST_ID}][#{level}] #{l}"
77
        end
78
    end
79
end
80

    
81
def log_error(msg)
82
    log(msg, "E")
83
end
84

    
85
def exit_error
86
    log_error("Exiting due to previous error.")
87
    exit(-1)
88
end
89

    
90
def states_xpath(*arr)
91
    arr.map{|e| "STATE=#{e}"}.join(" or ")
92
end
93

    
94
################################################################################
95
# Options
96
################################################################################
97

    
98
mode    = nil    # **must** be set to something other than nil using the options
99
force   = false  # By default, don't recreate/delete suspended and poweroff VMs
100
repeat  = 2      # By default, wait for 2 monitorization cycles
101
fencing = true
102

    
103
opts = GetoptLong.new(
104
            ['--migrate',     '-m', GetoptLong::NO_ARGUMENT],
105
            ['--delete',      '-d', GetoptLong::NO_ARGUMENT],
106
            ['--recreate',    '-r', GetoptLong::NO_ARGUMENT],
107
            ['--force',       '-f', GetoptLong::NO_ARGUMENT],
108
            ['--pause',       '-p', GetoptLong::REQUIRED_ARGUMENT],
109
            ['--no-fencing',  '-u', GetoptLong::NO_ARGUMENT]
110
        )
111

    
112
begin
113
    opts.each do |opt, arg|
114
        case opt
115
            when '--migrate'
116
                mode = :migrate
117
            when '--delete'
118
                mode = :delete
119
            when '--recreate'
120
                mode = :recreate
121
            when '--force'
122
                force = true
123
            when '--pause'
124
                repeat = arg.to_i
125
            when '--no-fencing'
126
                fencing = false
127
        end
128
    end
129
rescue Exception => e
130
    log_error e.to_s
131
    exit_error
132
end
133

    
134
if mode.nil?
135
    log_error "Exiting. A mode must be supplied."
136
    exit_error
137
end
138

    
139
################################################################################
140
# Main
141
################################################################################
142

    
143
log "Hook launched"
144

    
145
begin
146
    client = Client.new()
147
rescue Exception => e
148
    log_error e.to_s
149
    exit_error
150
end
151

    
152
sys  = OpenNebula::System.new(client)
153
conf = sys.get_configuration
154

    
155
begin
156
    MONITORING_INTERVAL = conf['MONITORING_INTERVAL'] || 60
157
rescue Exception => e
158
    log_error "Could not get MONITORING_INTERVAL"
159
    log_error e.to_s
160
    exit_error
161
end
162

    
163
# Retrieve hostname
164
host = OpenNebula::Host.new_with_id(HOST_ID, client)
165
rc   = host.info
166

    
167
if OpenNebula.is_error?(rc)
168
    log_error "Could not get host info"
169
    exit_error
170
end
171

    
172
log "hostname: #{host.name}"
173

    
174
if repeat > 0
175
    log "Wait #{repeat} cycles."
176

    
177
    # Sleep through the desired number of monitor interval
178
    period = repeat * MONITORING_INTERVAL.to_i
179

    
180
    log "Sleeping #{period} seconds."
181
    sleep(period)
182

    
183
    rc = host.info
184
    if OpenNebula.is_error?(rc)
185
        log_error "Could not get host info"
186
        exit_error
187
    end
188

    
189
    # If the host came back, exit! avoid duplicated VMs
190
    if host.state != 3
191
        log "Exiting. Host came back after waiting."
192
        exit 0
193
    end
194
end
195

    
196
# Do fencing
197
if fencing
198
    host64 = Base64::strict_encode64(host.to_xml)
199

    
200
    log "Fencing enabled"
201

    
202
    begin
203
        i, oe, w = Open3.popen2e(FENCE_HOST, host64)
204
        if w.value.success?
205
            log oe.read
206
            log "Fencing success"
207
        else
208
            raise oe.read << "\n" << "Fencing error"
209
        end
210
    rescue Exception => e
211
        log_error e.to_s
212
        exit_error
213
    end
214
else
215
    log "WARNING: Fencing disabled"
216
end
217

    
218
# Loop through all vms
219
vms = VirtualMachinePool.new(client)
220
rc  = vms.info_all
221

    
222
if OpenNebula.is_error?(rc)
223
    exit_error "Could not get vm pool"
224
end
225

    
226
# STATE=3: ACTIVE (LCM unknown)
227
# STATE=5: SUSPENDED
228
# STATE=8: POWEROFF
229

    
230
if mode == :recreate && !force
231
    log "states: 3"
232
    state = states_xpath(3)
233
else
234
    log "states: 3, 5, 8"
235
    state = states_xpath(3, 5, 8)
236
end
237

    
238
xpath = "/VM_POOL/VM[#{state}]/HISTORY_RECORDS/HISTORY[HOSTNAME=\"#{host.name}\" and last()]"
239
vm_ids_array = vms.retrieve_elements("#{xpath}/../../ID")
240

    
241
if vm_ids_array
242
    log "vms: #{vm_ids_array}"
243

    
244
    vm_ids_array.each do |vm_id|
245
        vm = OpenNebula::VirtualMachine.new_with_id(vm_id, client)
246
        rc = vm.info
247

    
248
        if OpenNebula.is_error?(rc)
249
            log_error "Could not get info of VM #{vm_id}"
250
            next
251
        end
252

    
253
        case mode
254
        when :recreate
255
            log "recreate #{vm_id}"
256
            vm.delete(true)
257
        when :delete
258
            log "delete #{vm_id}"
259
            vm.delete
260
        when :migrate
261
            log "resched #{vm_id}"
262
            vm.resched
263
        else
264
            log_error "unkown mode '#{mode}'"
265
            exit_error
266
        end
267
    end
268
else
269
    log "No VMs found."
270
end
271

    
272
log "Hook finished"
273
exit 0