diff options
author | Mark McLoughlin <markmc@redhat.com> | 2013-02-23 15:24:20 +0000 |
---|---|---|
committer | Mark McLoughlin <markmc@redhat.com> | 2013-02-23 16:18:47 +0000 |
commit | 38997fc916904e47dcb847e1737eb26cbd38f6e1 (patch) | |
tree | 3fe6c78b291d15d1fd4b658acae58a95a12b5524 | |
parent | a42845e455c74f41852babbbd09a3514021ea71d (diff) | |
download | nova-38997fc916904e47dcb847e1737eb26cbd38f6e1.tar.gz nova-38997fc916904e47dcb847e1737eb26cbd38f6e1.tar.xz nova-38997fc916904e47dcb847e1737eb26cbd38f6e1.zip |
Clean unused kernels and ramdisks from image cache
Fixes bug #1132138
Only unused disk images are currently cleaned up by the image cache
manager but it seems logical to clean up unused kernels and ramdisks
too.
Achieve that by writing kernels and ramdisks to disk using the sha1
sum of their ID as the filename. This is the same scheme as used for
disk image filenames and causes the image cache manager to consider
them for cleanup. We also make the cache manager take note of in use
kernels and ramdisks when iterating over the list of instances.
A nasty upgrade concern is that if we immediately switch to writing
kernels to disk using this scheme then, where shared storage is used,
we can have older image cache managers on remote compute nodes cleaning
up kernels because they appear unused. To mitigate that, turn off this
behaviour by default and allow it to be enabled using a new config
option. This option will be removed in future and the behaviour enabled
by default.
DocImpact: new remove_unused_kernels option
Change-Id: I56bba9fa6596601104498e262c2e657f0eae2fa0
-rw-r--r-- | nova/tests/test_imagecache.py | 27 | ||||
-rwxr-xr-x | nova/virt/libvirt/driver.py | 7 | ||||
-rw-r--r-- | nova/virt/libvirt/imagecache.py | 59 |
3 files changed, 71 insertions, 22 deletions
diff --git a/nova/tests/test_imagecache.py b/nova/tests/test_imagecache.py index 0c5c6d02c..b225a2116 100644 --- a/nova/tests/test_imagecache.py +++ b/nova/tests/test_imagecache.py @@ -163,6 +163,8 @@ class ImageCacheManagerTestCase(test.TestCase): 'vm_state': '', 'task_state': ''}, {'image_ref': '2', + 'kernel_id': '21', + 'ramdisk_id': '22', 'host': 'remotehost', 'name': 'inst-3', 'uuid': '789', @@ -174,18 +176,24 @@ class ImageCacheManagerTestCase(test.TestCase): # The argument here should be a context, but it's mocked out image_cache_manager._list_running_instances(None, all_instances) - self.assertEqual(len(image_cache_manager.used_images), 2) + self.assertEqual(len(image_cache_manager.used_images), 4) self.assertTrue(image_cache_manager.used_images['1'] == (1, 0, ['inst-1'])) self.assertTrue(image_cache_manager.used_images['2'] == (1, 1, ['inst-2', 'inst-3'])) + self.assertTrue(image_cache_manager.used_images['21'] == + (0, 1, ['inst-3'])) + self.assertTrue(image_cache_manager.used_images['22'] == + (0, 1, ['inst-3'])) self.assertTrue('inst-1' in image_cache_manager.instance_names) self.assertTrue('123' in image_cache_manager.instance_names) - self.assertEqual(len(image_cache_manager.image_popularity), 2) + self.assertEqual(len(image_cache_manager.image_popularity), 4) self.assertEqual(image_cache_manager.image_popularity['1'], 1) self.assertEqual(image_cache_manager.image_popularity['2'], 2) + self.assertEqual(image_cache_manager.image_popularity['21'], 1) + self.assertEqual(image_cache_manager.image_popularity['22'], 1) def test_list_resizing_instances(self): all_instances = [{'image_ref': '1', @@ -703,6 +711,8 @@ class ImageCacheManagerTestCase(test.TestCase): def test_verify_base_images(self): hashed_1 = '356a192b7913b04c54574d18c28d46e6395428ab' + hashed_21 = '472b07b9fcf2c2451e8781e944bf5f77cd8457c8' + hashed_22 = '12c6fc06c99a462375eeb3f43dfd832b08ca9e17' hashed_42 = '92cfceb39d57d914ed8b14d0e37643de0797ae56' self.flags(instances_path='/instance_path') @@ -715,6 +725,8 @@ class ImageCacheManagerTestCase(test.TestCase): 'e09c675c2d1cfac32dae3c2d83689c8c94bc693b_sm', hashed_42, hashed_1, + hashed_21, + hashed_22, '%s_5368709120' % hashed_1, '%s_10737418240' % hashed_1, '00000004'] @@ -744,8 +756,10 @@ class ImageCacheManagerTestCase(test.TestCase): if path == fq_path(p) + '.info': return False - if path in ['/instance_path/_base/%s_sm' % hashed_1, - '/instance_path/_base/%s_sm' % hashed_42]: + if path in ['/instance_path/_base/%s_sm' % i for i in [hashed_1, + hashed_21, + hashed_22, + hashed_42]]: return False self.fail('Unexpected path existence check: %s' % path) @@ -800,6 +814,8 @@ class ImageCacheManagerTestCase(test.TestCase): 'vm_state': '', 'task_state': ''}, {'image_ref': '1', + 'kernel_id': '21', + 'ramdisk_id': '22', 'host': CONF.host, 'name': 'instance-2', 'uuid': '456', @@ -850,7 +866,8 @@ class ImageCacheManagerTestCase(test.TestCase): image_cache_manager.verify_base_images(None, all_instances) # Verify - active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1)] + active = [fq_path(hashed_1), fq_path('%s_5368709120' % hashed_1), + fq_path(hashed_21), fq_path(hashed_22)] self.assertEquals(image_cache_manager.active_base_files, active) for rem in [fq_path('e97222e91fc4241f49a7f520d1dcf446751129b3_sm'), diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index dfae88c23..b0649b105 100755 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -44,7 +44,6 @@ import errno import eventlet import functools import glob -import hashlib import os import shutil import socket @@ -1718,7 +1717,7 @@ class LibvirtDriver(driver.ComputeDriver): 'ramdisk_id': instance['ramdisk_id']} if disk_images['kernel_id']: - fname = disk_images['kernel_id'] + fname = imagecache.get_cache_fname(disk_images, 'kernel_id') raw('kernel').cache(fetch_func=libvirt_utils.fetch_image, context=context, filename=fname, @@ -1726,7 +1725,7 @@ class LibvirtDriver(driver.ComputeDriver): user_id=instance['user_id'], project_id=instance['project_id']) if disk_images['ramdisk_id']: - fname = disk_images['ramdisk_id'] + fname = imagecache.get_cache_fname(disk_images, 'ramdisk_id') raw('ramdisk').cache(fetch_func=libvirt_utils.fetch_image, context=context, filename=fname, @@ -1734,7 +1733,7 @@ class LibvirtDriver(driver.ComputeDriver): user_id=instance['user_id'], project_id=instance['project_id']) - root_fname = hashlib.sha1(str(disk_images['image_id'])).hexdigest() + root_fname = imagecache.get_cache_fname(disk_images, 'image_id') size = instance['root_gb'] * 1024 * 1024 * 1024 inst_type = instance['instance_type'] diff --git a/nova/virt/libvirt/imagecache.py b/nova/virt/libvirt/imagecache.py index d66d61415..74a63b81e 100644 --- a/nova/virt/libvirt/imagecache.py +++ b/nova/virt/libvirt/imagecache.py @@ -54,6 +54,12 @@ imagecache_opts = [ cfg.BoolOpt('remove_unused_base_images', default=True, help='Should unused base images be removed?'), + cfg.BoolOpt('remove_unused_kernels', + default=False, + help='Should unused kernel images be removed? This is only ' + 'safe to enable if all compute nodes have been updated ' + 'to support this option. This will enabled by default ' + 'in future.'), cfg.IntOpt('remove_unused_resized_minimum_age_seconds', default=3600, help='Unused resized base images younger than this will not be ' @@ -76,6 +82,29 @@ CONF.import_opt('host', 'nova.netconf') CONF.import_opt('instances_path', 'nova.compute.manager') +def get_cache_fname(images, key): + """Return a filename based on the SHA1 hash of a given image ID. + + Image files stored in the _base directory that match this pattern + are considered for cleanup by the image cache manager. The cache + manager considers the file to be in use if it matches an instance's + image_ref, kernel_id or ramdisk_id property. + + However, in grizzly-3 and before, only the image_ref property was + considered. This means that it's unsafe to store kernel and ramdisk + images using this pattern until we're sure that all compute nodes + are running a cache manager newer than grizzly-3. For now, we + require admins to confirm that by setting the remove_unused_kernels + boolean but, at some point in the future, we'll be safely able to + assume this. + """ + image_id = str(images[key]) + if not CONF.remove_unused_kernels and key in ['kernel_id', 'ramdisk_id']: + return image_id + else: + return hashlib.sha1(image_id).hexdigest() + + def get_info_filename(base_path): """Construct a filename for storing additional information about a base image. @@ -240,8 +269,8 @@ class ImageCacheManager(object): """Return a list of the images present in _base. Determine what images we have on disk. There will be other files in - this directory (for example kernels) so we only grab the ones which - are the right length to be disk images. + this directory so we only grab the ones which are the right length + to be disk images. Note that this does not return a value. It instead populates a class variable with a list of images that we need to try and explain. @@ -278,18 +307,22 @@ class ImageCacheManager(object): self.instance_names.add(instance['name'] + '_resize') self.instance_names.add(instance['uuid'] + '_resize') - image_ref_str = str(instance['image_ref']) - local, remote, insts = self.used_images.get(image_ref_str, - (0, 0, [])) - if instance['host'] == CONF.host: - local += 1 - else: - remote += 1 - insts.append(instance['name']) - self.used_images[image_ref_str] = (local, remote, insts) + for image_key in ['image_ref', 'kernel_id', 'ramdisk_id']: + try: + image_ref_str = str(instance[image_key]) + except KeyError: + continue + local, remote, insts = self.used_images.get(image_ref_str, + (0, 0, [])) + if instance['host'] == CONF.host: + local += 1 + else: + remote += 1 + insts.append(instance['name']) + self.used_images[image_ref_str] = (local, remote, insts) - self.image_popularity.setdefault(image_ref_str, 0) - self.image_popularity[image_ref_str] += 1 + self.image_popularity.setdefault(image_ref_str, 0) + self.image_popularity[image_ref_str] += 1 def _list_backing_images(self): """List the backing images currently in use.""" |