Comment 3 for bug 1933265

Revision history for this message
masterpe (michiel-y) wrote :

This bug started in Stein.

I have wrote a script that deletes the snapshots that are created for the backups. Except the last backup snapshot.

#!/usr/bin/python3

import rados
import sys
import rbd
import re
import time

"""
Volumes can have two types of names:
volume-5b2851f8-4722-42c2-a6dd-4dc359afbf6c
volume-81895053-a4d4-4cdb-9e1f-4e8340d56949.deleted

Volumes with the suffix deleted, are volumes that are deleted but have related glance images to it or have childeren to a new cinder volumes.

A Cinder volume can have snapshots, Glance images and cinder backup.
In Ceph a cinder snapshot, this is a Ceph snapshot with the prefix of the name "snapshot-", with the protection set and the ID of the snapshot:
snapshot-722ad9a3-e1bf-416f-81d4-01a4f32c02ee

When a Glance image gets created based from a Cinder Volume, a new cinder volume gets created and in Ceph a new snapshot gets
created with the name volume-<UUID of of the new cinder_volume>.clone_snap

When a cinder-backup full backup gets created.
    On the source ceph cluster the image gets a snapshot with syntax "backup.<UUID of the backup>.snap.<timestamp>":
    backup.03fc2dfb-9654-4f00-ac11-a8f471c599d0.snap.1625049437.645906

    On the destination ceph cluster the naming is volume-<UUID of the volume>.backup.<UUID of the backup> and the name of
    the snapshot backup.<UUID of the backup>.snap.<timestamp>

When a incremental cinder backup is created
    on the source the naming of the snapshot is the same as with a full backup: backup.<UUID of the backup>.snap.<timestamp>
    on the destination the volume of the full backup is used and a snapshot is created with syntax "backup.<UUID of the backup>.snap.<timestamp>"

When a backup also has incremental backups you are unable to delete in Cinder the full backups.

So on the source Ceph cluster, a volume can have three types of snapshots:
    snapshot-722ad9a3-e1bf-416f-81d4-01a4f32c02ee
    volume-7048a7bf-530b-42ee-bfe9-6221c3b9f384.clone_snap
    backup.e224e8d2-5bda-4a8a-8841-0c578737f865.snap.1625049686.5710375
"""

class Cleanup:
    dryRun = True

    def __init__(self):
        self.__cluster__ = rados.Rados(conffile='/etc/ceph/ceph.conf')
        print("\nlibrados version: {}".format(str(self.__cluster__.version())))
        print("Will attempt to connect to: {}".format(str(self.__cluster__.conf_get('mon host'))))

        self.__cluster__.connect()
        print("\nCluster ID: {}".format(self.__cluster__.get_fsid()))

        print("\n\nCluster Statistics")
        print("==================")
        cluster_stats = self.__cluster__.get_cluster_stats()

        ## open pool
        self.__ioctx__ = self.__cluster__.open_ioctx('volumes')
        self.__rbd_inst__ = rbd.RBD()
        self.__allImages__ = self.__rbd_inst__.list(self.__ioctx__)

    def getAllCinderVolumes(self):
        allFoundImages = []
        for imageName in self.__allImages__:
            if not re.search('^volume-[a-z0-9-]{36}$', imageName) == None:
                allFoundImages.append(imageName)
        return allFoundImages

    def getSnapshotsOfImage(self, imageName):
        snapshots = []
        image = rbd.Image(self.__ioctx__, name=imageName)

        for snapshot in image.list_snaps():
            if not re.search('^backup.[a-z0-9-]{36}\.snap\.[0-9]+\.[0-9]+$', snapshot['name']) == None:
                snapshots.append(snapshot['name'])
        return snapshots

    def __findLatestSnapshot__(self, snapshots):
        latest = {}
        latest['timestamp'] = 0
        latest['name'] = ''
        for snapshot in snapshots:
            snapshotTimestamp = float(snapshot.split('.',3)[3])
            if latest['timestamp'] <= snapshotTimestamp:
                latest['timestamp'] = snapshotTimestamp
                latest['name'] = snapshot
        return latest['name']

    def getSnapshotsOfImageThatCanBeDeleted(self, imageName):
        snapshots = self.getSnapshotsOfImage(imageName)
        # not snapshots == None
        if len(snapshots) > 0:
            latest = self.__findLatestSnapshot__(snapshots)
            snapshots.remove(latest)
            return snapshots
        else:
            return []

    def deleteSnapshots(self, imageName, snapshots):
        print('the following snapshots will be deleted from %s: %s' % (imageName, snapshots))

        image = rbd.Image(self.__ioctx__, name=imageName)
        for snapshot in snapshots:
            print('deleting %s of %s' % (snapshot, imageName))
            if not self.dryRun:
                image.remove_snap(snapshot)

                # sleep for X seconds to reduce the load
                self.sleepTime(60)

    def sleepTime(self, sleepTime):
        interval = 10
        print('sleep for %s seconds' % (sleepTime))
        while sleepTime > interval:
            print('waiting for %s sec' % (sleepTime))
            time.sleep(interval)
            sleepTime = sleepTime - interval
        if sleepTime > 0:
            print('waiting for %s sec' % (sleepTime))
            time.sleep(sleepTime)

cleanup = Cleanup()

# imageName = 'volume-03ffce25-2137-40f8-be7f-d31c19c5ed55'
# snapshots = cleanup.getSnapshotsOfImageThatCanBeDeleted(imageName)
#
# if len(snapshots) > 0:
# result = cleanup.deleteSnapshots(imageName, snapshots)

for imageName in cleanup.getAllCinderVolumes():
    snapshots = cleanup.getSnapshotsOfImageThatCanBeDeleted(imageName)

    if len(snapshots) > 0:
        result = cleanup.deleteSnapshots(imageName, snapshots)