summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark McLoughlin <markmc@redhat.com>2013-02-23 15:24:20 +0000
committerMark McLoughlin <markmc@redhat.com>2013-02-23 16:18:47 +0000
commit38997fc916904e47dcb847e1737eb26cbd38f6e1 (patch)
tree3fe6c78b291d15d1fd4b658acae58a95a12b5524
parenta42845e455c74f41852babbbd09a3514021ea71d (diff)
downloadnova-38997fc916904e47dcb847e1737eb26cbd38f6e1.tar.gz
nova-38997fc916904e47dcb847e1737eb26cbd38f6e1.tar.xz
nova-38997fc916904e47dcb847e1737eb26cbd38f6e1.zip
Clean unused kernels and ramdisks from image cache
Fixes bug #1132138 Only unused disk images are currently cleaned up by the image cache manager but it seems logical to clean up unused kernels and ramdisks too. Achieve that by writing kernels and ramdisks to disk using the sha1 sum of their ID as the filename. This is the same scheme as used for disk image filenames and causes the image cache manager to consider them for cleanup. We also make the cache manager take note of in use kernels and ramdisks when iterating over the list of instances. A nasty upgrade concern is that if we immediately switch to writing kernels to disk using this scheme then, where shared storage is used, we can have older image cache managers on remote compute nodes cleaning up kernels because they appear unused. To mitigate that, turn off this behaviour by default and allow it to be enabled using a new config option. This option will be removed in future and the behaviour enabled by default. DocImpact: new remove_unused_kernels option Change-Id: I56bba9fa6596601104498e262c2e657f0eae2fa0
-rw-r--r--nova/tests/test_imagecache.py27
-rwxr-xr-xnova/virt/libvirt/driver.py7
-rw-r--r--nova/virt/libvirt/imagecache.py59
3 files changed, 71 insertions, 22 deletions
diff --git a/nova/tests/test_imagecache.py b/nova/tests/test_imagecache.py
index 0c5c6d02c..b225a2116 100644
--- a/nova/tests/test_imagecache.py
+++ b/nova/tests/test_imagecache.py
@@ -163,6 +163,8 @@ class ImageCacheManagerTestCase(test.TestCase):
'vm_state': '',
'task_state': ''},
{'image_ref': '2',
+ 'kernel_id': '21',
+ 'ramdisk_id': '22',
'host': 'remotehost',
'name': 'inst-3',
'uuid': '789',
@@ -174,18 +176,24 @@ class ImageCacheManagerTestCase(test.TestCase):
# The argument here should be a context, but it's mocked out
image_cache_manager._list_running_instances(None, all_instances)
- self.assertEqual(len(image_cache_manager.used_images), 2)
+ self.assertEqual(len(image_cache_manager.used_images), 4)
self.assertTrue(image_cache_manager.used_images['1'] ==
(1, 0, ['inst-1']))
self.assertTrue(image_cache_manager.used_images['2'] ==
(1, 1, ['inst-2', 'inst-3']))
+ self.assertTrue(image_cache_manager.used_images['21'] ==
+ (0, 1, ['inst-3']))
+ self.assertTrue(image_cache_manager.used_images['22'] ==
+ (0, 1, ['inst-3']))
self.assertTrue('inst-1' in image_cache_manager.instance_names)
self.assertTrue('123' in image_cache_manager.instance_names)
- self.assertEqual(len(image_cache_manager.image_popularity), 2)
+ self.assertEqual(len(image_cache_manager.image_popularity), 4)
self.assertEqual(image_cache_manager.image_popularity['1'], 1)
self.assertEqual(image_cache_manager.image_popularity['2'], 2)
+ self.assertEqual(image_cache_manager.image_popularity['21'], 1)
+ self.assertEqual(image_cache_manager.image_popularity['22'], 1)
def test_list_resizing_instances(self):
all_instances = [{'image_ref': '1',
@@ -703,6 +711,8 @@ class ImageCacheManagerTestCase(test.TestCase):
def test_verify_base_images(self):
hashed_1 = '356a192b7913b04c54574d18c28d46e6395428ab'
+ hashed_21 = '472b07b9fcf2c2451e8781e944bf5f77cd8457c8'
+ hashed_22 = '12c6fc06c99a462375eeb3f43dfd832b08ca9e17'
hashed_42 = '92cfceb39d57d914ed8b14d0e37643de0797ae56'
self.flags(instances_path='/instance_path')
@@ -715,6 +725,8 @@ class ImageCacheManagerTestCase(test.TestCase):
'e09c675c2d1cfac32dae3c2d83689c8c94bc693b_sm',
hashed_42,
hashed_1,
+ hashed_21,
+ hashed_22,
'%s_5368709120' % hashed_1,
'%s_10737418240' % hashed_1,
'00000004']
@@ -744,8 +756,10 @@ class ImageCacheManagerTestCase(test.TestCase):
if path == fq_path(p) + '.info':
return False
- if path in ['/instance_path/_base/%s_sm' % hashed_1,
- '/instance_path/_base/%s_sm' % hashed_42]:
+ if path in ['/instance_path/_base/%s_sm' % i for i in [hashed_1,
+ hashed_21,
+ hashed_22,
+ hashed_42]]:
return False
self.fail('Unexpected path existence check: %s' % path)
@@ -800,6 +814,8 @@ class ImageCacheManagerTestCase(test.TestCase):
'vm_state': '',
'task_state': ''},
{'image_ref': '1',
+ 'kernel_id': '21',
+ 'ramdisk_id': '22',
'host': CONF.host,
'name': 'instance-2',
'uuid': '456',
@@ -850,7 +866,8 @@ class ImageCacheManagerTestCase(test.TestCase):
image_cache_manager.verify_base_images(None, all_instances)
# Verify
- active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1)]
+ active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1),
+ fq_path(hashed_21), fq_path(hashed_22)]
self.assertEquals(image_cache_manager.active_base_files, active)
for rem in [fq_path('e97222e91fc4241f49a7f520d1dcf446751129b3_sm'),
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index dfae88c23..b0649b105 100755
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -44,7 +44,6 @@ import errno
import eventlet
import functools
import glob
-import hashlib
import os
import shutil
import socket
@@ -1718,7 +1717,7 @@ class LibvirtDriver(driver.ComputeDriver):
'ramdisk_id': instance['ramdisk_id']}
if disk_images['kernel_id']:
- fname = disk_images['kernel_id']
+ fname = imagecache.get_cache_fname(disk_images, 'kernel_id')
raw('kernel').cache(fetch_func=libvirt_utils.fetch_image,
context=context,
filename=fname,
@@ -1726,7 +1725,7 @@ class LibvirtDriver(driver.ComputeDriver):
user_id=instance['user_id'],
project_id=instance['project_id'])
if disk_images['ramdisk_id']:
- fname = disk_images['ramdisk_id']
+ fname = imagecache.get_cache_fname(disk_images, 'ramdisk_id')
raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_image,
context=context,
filename=fname,
@@ -1734,7 +1733,7 @@ class LibvirtDriver(driver.ComputeDriver):
user_id=instance['user_id'],
project_id=instance['project_id'])
- root_fname = hashlib.sha1(str(disk_images['image_id'])).hexdigest()
+ root_fname = imagecache.get_cache_fname(disk_images, 'image_id')
size = instance['root_gb'] * 1024 * 1024 * 1024
inst_type = instance['instance_type']
diff --git a/nova/virt/libvirt/imagecache.py b/nova/virt/libvirt/imagecache.py
index d66d61415..74a63b81e 100644
--- a/nova/virt/libvirt/imagecache.py
+++ b/nova/virt/libvirt/imagecache.py
@@ -54,6 +54,12 @@ imagecache_opts = [
cfg.BoolOpt('remove_unused_base_images',
default=True,
help='Should unused base images be removed?'),
+ cfg.BoolOpt('remove_unused_kernels',
+ default=False,
+ help='Should unused kernel images be removed? This is only '
+ 'safe to enable if all compute nodes have been updated '
+ 'to support this option. This will enabled by default '
+ 'in future.'),
cfg.IntOpt('remove_unused_resized_minimum_age_seconds',
default=3600,
help='Unused resized base images younger than this will not be '
@@ -76,6 +82,29 @@ CONF.import_opt('host', 'nova.netconf')
CONF.import_opt('instances_path', 'nova.compute.manager')
+def get_cache_fname(images, key):
+ """Return a filename based on the SHA1 hash of a given image ID.
+
+ Image files stored in the _base directory that match this pattern
+ are considered for cleanup by the image cache manager. The cache
+ manager considers the file to be in use if it matches an instance's
+ image_ref, kernel_id or ramdisk_id property.
+
+ However, in grizzly-3 and before, only the image_ref property was
+ considered. This means that it's unsafe to store kernel and ramdisk
+ images using this pattern until we're sure that all compute nodes
+ are running a cache manager newer than grizzly-3. For now, we
+ require admins to confirm that by setting the remove_unused_kernels
+ boolean but, at some point in the future, we'll be safely able to
+ assume this.
+ """
+ image_id = str(images[key])
+ if not CONF.remove_unused_kernels and key in ['kernel_id', 'ramdisk_id']:
+ return image_id
+ else:
+ return hashlib.sha1(image_id).hexdigest()
+
+
def get_info_filename(base_path):
"""Construct a filename for storing additional information about a base
image.
@@ -240,8 +269,8 @@ class ImageCacheManager(object):
"""Return a list of the images present in _base.
Determine what images we have on disk. There will be other files in
- this directory (for example kernels) so we only grab the ones which
- are the right length to be disk images.
+ this directory so we only grab the ones which are the right length
+ to be disk images.
Note that this does not return a value. It instead populates a class
variable with a list of images that we need to try and explain.
@@ -278,18 +307,22 @@ class ImageCacheManager(object):
self.instance_names.add(instance['name'] + '_resize')
self.instance_names.add(instance['uuid'] + '_resize')
- image_ref_str = str(instance['image_ref'])
- local, remote, insts = self.used_images.get(image_ref_str,
- (0, 0, []))
- if instance['host'] == CONF.host:
- local += 1
- else:
- remote += 1
- insts.append(instance['name'])
- self.used_images[image_ref_str] = (local, remote, insts)
+ for image_key in ['image_ref', 'kernel_id', 'ramdisk_id']:
+ try:
+ image_ref_str = str(instance[image_key])
+ except KeyError:
+ continue
+ local, remote, insts = self.used_images.get(image_ref_str,
+ (0, 0, []))
+ if instance['host'] == CONF.host:
+ local += 1
+ else:
+ remote += 1
+ insts.append(instance['name'])
+ self.used_images[image_ref_str] = (local, remote, insts)
- self.image_popularity.setdefault(image_ref_str, 0)
- self.image_popularity[image_ref_str] += 1
+ self.image_popularity.setdefault(image_ref_str, 0)
+ self.image_popularity[image_ref_str] += 1
def _list_backing_images(self):
"""List the backing images currently in use."""