Skip to content

Commit

Permalink
Merge pull request #1066 from Backblaze/persistent-bucket-auto-clean
Browse files Browse the repository at this point in the history
Persistent bucket auto clean
  • Loading branch information
mpnowacki-reef authored Jan 14, 2025
2 parents cb77345 + 8e4473d commit cf9efa2
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Deleting used files by integration tests right away.
24 changes: 20 additions & 4 deletions test/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import uuid
from os import environ, path
from tempfile import TemporaryDirectory
from typing import Generator

import pytest
from b2sdk.v2 import B2_ACCOUNT_INFO_ENV_VAR, XDG_CONFIG_HOME_ENV_VAR, Bucket
Expand All @@ -35,6 +36,7 @@
from .persistent_bucket import (
PersistentBucketAggregate,
get_or_create_persistent_bucket,
prune_used_files,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -421,18 +423,32 @@ def b2_uri_args(apiver_int):
return b2_uri_args_v3


# -- Persistent bucket fixtures --
# -- Persistent bucket code ---

subfolder_list: list[str] = []

@pytest.fixture(scope="session")
def base_persistent_bucket(b2_api):
bucket = get_or_create_persistent_bucket(b2_api)
yield bucket
prune_used_files(b2_api=b2_api,bucket=bucket, folders=subfolder_list)


@pytest.fixture
def unique_subfolder():
subfolder = f'test-{uuid.uuid4().hex[:8]}'
subfolder_list.append(subfolder)
yield subfolder


@pytest.fixture
def persistent_bucket(unique_subfolder, b2_api) -> PersistentBucketAggregate:
def persistent_bucket(unique_subfolder,
base_persistent_bucket) -> Generator[PersistentBucketAggregate]:
"""
Since all consumers of the `bucket_name` fixture expect a new bucket to be created,
we need to mirror this behavior by appending a unique subfolder to the persistent bucket name.
"""
persistent_bucket = get_or_create_persistent_bucket(b2_api)
yield PersistentBucketAggregate(persistent_bucket.name, unique_subfolder)
yield PersistentBucketAggregate(base_persistent_bucket.name,
unique_subfolder)

logger.info("Persistent bucket aggregate finished completion.")
49 changes: 36 additions & 13 deletions test/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from os import environ, linesep
from pathlib import Path
from tempfile import mkdtemp, mktemp
from typing import Any, Iterable, TypeVar

import backoff
from b2sdk.v2 import (
Expand Down Expand Up @@ -131,6 +132,10 @@ def bucket_name_part(length: int) -> str:
logger.info('name_part: %s', name_part)
return name_part

T = TypeVar('T')
def wrap_iterables(generators: list[Iterable[T]]):
for g in generators:
yield from g

@dataclass
class Api:
Expand Down Expand Up @@ -219,23 +224,41 @@ def clean_buckets(self, quick=False):
TooManyRequests,
max_tries=8,
)
def clean_bucket(self, bucket: Bucket | str):
if isinstance(bucket, str):
bucket = self.api.get_bucket_by_name(bucket)
def clean_bucket(self, bucket_object: Bucket | str, only_files: bool = False, only_folders: list[str] | None = None, ignore_retentions: bool = False):
"""
Clean contents of bucket, by default also deleting the bucket.
# try optimistic bucket removal first, since it is completely free (as opposed to `ls` call)
try:
return self.api.delete_bucket(bucket)
except (BucketIdNotFound, v3BucketIdNotFound):
return # bucket was already removed
except BadRequest as exc:
assert exc.code == 'cannot_delete_non_empty_bucket'
Args:
bucket (Bucket | str): Bucket object or name
only_files (bool): If to only delete files and not the bucket
only_folders (list[str] | None): If not None, filter to only files in given folders.
ignore_retentions (bool): If deletion should happen regardless of files' retention mode.
"""
bucket: Bucket
if isinstance(bucket_object, str):
bucket = self.api.get_bucket_by_name(bucket_object)
else:
bucket = bucket_object

if not only_files:
# try optimistic bucket removal first, since it is completely free (as opposed to `ls` call)
try:
return self.api.delete_bucket(bucket)
except (BucketIdNotFound, v3BucketIdNotFound):
return # bucket was already removed
except BadRequest as exc:
assert exc.code == 'cannot_delete_non_empty_bucket'

files_leftover = False
file_versions = bucket.ls(latest_only=False, recursive=True)

file_versions: Iterable[Any]
if only_folders:
file_versions = wrap_iterables([bucket.ls(latest_only=False, recursive=True, folder_to_list=folder,) for folder in only_folders])
else:
file_versions = bucket.ls(latest_only=False, recursive=True)

for file_version_info, _ in file_versions:
if file_version_info.file_retention:
if file_version_info.file_retention and not ignore_retentions:
if file_version_info.file_retention.mode == RetentionMode.GOVERNANCE:
print('Removing retention from file version:', file_version_info.id_)
self.api.update_file_retention(
Expand Down Expand Up @@ -272,7 +295,7 @@ def clean_bucket(self, bucket: Bucket | str):

if files_leftover:
print('Unable to remove bucket because some retained files remain')
else:
elif not only_files:
print('Removing bucket:', bucket.name)
try:
self.api.delete_bucket(bucket)
Expand Down
4 changes: 4 additions & 0 deletions test/integration/persistent_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import os
from dataclasses import dataclass
from functools import cached_property
from typing import List

import backoff
from b2sdk.v2 import Bucket
Expand Down Expand Up @@ -62,3 +63,6 @@ def get_or_create_persistent_bucket(b2_api: Api) -> Bucket:
# add the new bucket name to the list of bucket names
b2_api.bucket_name_log.append(bucket_name)
return bucket

def prune_used_files(b2_api: Api, bucket: Bucket, folders: List[str]):
b2_api.clean_bucket(bucket_object=bucket, only_files=True, only_folders=folders,ignore_retentions=True)

0 comments on commit cf9efa2

Please sign in to comment.