Skip to content

Commit

Permalink
Merge pull request #47 from a-luna:refactoring_20240511
Browse files Browse the repository at this point in the history
Refactor backup_db_and_json_files procedure
  • Loading branch information
a-luna authored May 11, 2024
2 parents ffdb0bb + 45ad9e8 commit 74e6e4c
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 102 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ RUN echo "RATE_LIMIT_BURST=$RATE_LIMIT_BURST" >> /code/.env

RUN pip install -U pip setuptools wheel
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
RUN pip install --no-cache-dir -r /code/requirements.txt
EXPOSE 80
COPY ./app /code/app
RUN PYTHONPATH=/code/. python /code/./app/data/scripts/get_prod_data.py
Expand Down
11 changes: 8 additions & 3 deletions app/core/redis_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
class IRedisClient(Protocol):
@property
def client(self) -> Redis:
"""
Returns an active Redis client using config values generated from environment variables.
"""
...

def lock(self, name: str, blocking_timeout: float | int) -> Any:
Expand Down Expand Up @@ -52,12 +55,14 @@ def get(self, name: RedisKey) -> RedisResponse:

def time(self) -> float:
"""
Returns the server time as a 2-item tuple of ints:
(seconds since epoch, microseconds into this second).
Return POSIX timestamp as a float value representing seconds since the epoch
"""
...

def now(self) -> datetime:
"""
Return the current time as a time-zone aware datetime object
"""
...


Expand Down Expand Up @@ -97,7 +102,7 @@ def client(self) -> Redis:
self.logger.info("Successfully connected to Redis server.")
else:
self._handle_connect_attempt_failed()
except ConnectionError: # noqa: PERF203
except ConnectionError:
self._handle_connect_attempt_failed()
return self._client

Expand Down
1 change: 1 addition & 0 deletions app/data/scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# flake8: noqa
from app.data.scripts.backup_db_and_json_files import backup_db_and_json_files
from app.data.scripts.bootstrap_unicode_data import bootstrap_unicode_data
from app.data.scripts.get_xml_unicode_db import download_xml_unicode_database
from app.data.scripts.parse_xml_unicode_db import parse_xml_unicode_database
Expand Down
55 changes: 55 additions & 0 deletions app/data/scripts/backup_db_and_json_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile

from app.config.api_settings import UnicodeApiSettings
from app.core.result import Result
from app.data.util.command import run_command
from app.data.util.spinners import Spinner


def backup_db_and_json_files(config: UnicodeApiSettings) -> Result[None]:
spinner = Spinner()
spinner.start("Creating compressed backup files of SQLite DB and JSON files...")
backup_sqlite_db(config)
backup_json_files(config)
spinner.successful("Successfully created compressed backup files of SQLite DB and JSON files!")

spinner = Spinner()
result = upload_zip_file_to_s3(config, config.DB_ZIP_FILE)
if result.failure:
spinner.start("")
spinner.failed(result.error)
return result
config.DB_ZIP_FILE.unlink()

result = upload_zip_file_to_s3(config, config.JSON_ZIP_FILE)
if result.failure:
spinner.start("")
spinner.failed(result.error)
return result
config.JSON_ZIP_FILE.unlink()
spinner.start("")
spinner.successful("Successfully uploaded backup files to S3 bucket!")
return Result.Ok()


def backup_sqlite_db(config: UnicodeApiSettings):
with ZipFile(config.DB_ZIP_FILE, "w", ZIP_DEFLATED) as zip:
zip.write(config.DB_FILE, f"{config.DB_FILE.name}")


def backup_json_files(config: UnicodeApiSettings):
zip_file = config.JSON_FOLDER.joinpath("unicode_json.zip")
with ZipFile(zip_file, "w", ZIP_DEFLATED) as zip:
zip.write(config.PLANES_JSON, f"{config.PLANES_JSON.name}")
zip.write(config.BLOCKS_JSON, f"{config.BLOCKS_JSON.name}")
zip.write(config.CHAR_NAME_MAP, f"{config.CHAR_NAME_MAP.name}")
zip.write(config.UNIHAN_CHARS_JSON, f"{config.UNIHAN_CHARS_JSON.name}")
zip.write(config.TANGUT_CHARS_JSON, f"{config.TANGUT_CHARS_JSON.name}")


def upload_zip_file_to_s3(config: UnicodeApiSettings, local_file: Path) -> Result[None]:
result = run_command(f"s3cmd put {local_file} {config.S3_BUCKET_URL}/{config.UNICODE_VERSION}/{local_file.name} -P")
if result.failure:
return result
return Result.Ok()
14 changes: 6 additions & 8 deletions app/data/scripts/bootstrap_unicode_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def bootstrap_unicode_data() -> Result[UnicodeApiSettings]:
if version := os.environ.get("UNICODE_VERSION"):
result = check_min_version(version)
if result.failure:
return Result.Fail(result.error if result.error else "")
return Result.Fail(result.error)
else:
os.environ["UNICODE_VERSION"] = SUPPORTED_UNICODE_VERSIONS[-1]

Expand All @@ -25,8 +25,8 @@ def check_min_version(check_version: str) -> Result[None]:
result = parse_semver_string(check_version)
if result.failure:
return Result.Fail(result.error)
(major, minor, patch) = result.value
if f"{major}.{minor}.{patch}" in SUPPORTED_UNICODE_VERSIONS:
parsed_version = result.value
if parsed_version in SUPPORTED_UNICODE_VERSIONS:
return Result.Ok()
error = (
"This script parses the XML representation of the Unicode Character Database, which has been distributed "
Expand All @@ -38,12 +38,10 @@ def check_min_version(check_version: str) -> Result[None]:
return Result.Fail(error)


def parse_semver_string(input: str) -> Result[tuple[int, int, int]]:
def parse_semver_string(input: str) -> Result[str]:
match = SEMVER_REGEX.match(input)
if not match:
return Result.Fail(f"'{input}' is not a valid semantic version")
groups = match.groupdict()
major = groups.get("major", "0")
minor = groups.get("minor", "0")
patch = groups.get("patch", "0")
return Result.Ok((int(major), int(minor), int(patch)))
major, minor, patch = (int(groups.get("major", "0")), int(groups.get("minor", "0")), int(groups.get("patch", "0")))
return Result.Ok(f"{major}.{minor}.{patch}")
6 changes: 0 additions & 6 deletions app/data/scripts/get_prod_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
from pathlib import Path
from zipfile import ZipFile

Expand All @@ -9,11 +8,6 @@

def get_prod_data() -> Result[None]:
settings = get_api_settings()
logger = logging.getLogger("app.api")
logger.info(
"Begin Process: Bootstrap Unicode Data (ENV: ${settings.ENV}, UNICODE_VERSION: ${settings.UNICODE_VERSION})"
)

result = get_unicode_db(settings)
if result.failure:
return result
Expand Down
4 changes: 2 additions & 2 deletions app/data/scripts/get_xml_unicode_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def download_xml_unicode_database(config: UnicodeApiSettings) -> Result[Path]:
if os.environ.get("ENV") != "PROD" and config.XML_FILE.exists():
return Result.Ok(config.XML_FILE)
result = download_unicode_xml_zip(config)
if result.failure or not result.value:
if result.failure:
return result
xml_zip = result.value
result = extract_unicode_xml_from_zip(config)
if result.failure or not result.value:
if result.failure:
return result
xml_file = result.value
xml_zip.unlink()
Expand Down
6 changes: 3 additions & 3 deletions app/data/scripts/parse_xml_unicode_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

def parse_xml_unicode_database(config: UnicodeApiSettings) -> Result[AllParsedUnicodeData]:
result = parse_etree_from_xml_file(config.XML_FILE)
if result.failure or not result.value:
return Result.Fail(result.error or "")
if result.failure:
return Result.Fail(result.error)
unicode_xml = result.value

(all_planes, all_blocks) = parse_unicode_plane_and_block_data_from_xml(unicode_xml, config)
all_chars: list[CharDetailsDict] = parse_unicode_character_data_from_xml(unicode_xml, all_blocks, all_planes)
all_chars = parse_unicode_character_data_from_xml(unicode_xml, all_blocks, all_planes)
spinner = Spinner()
spinner.start("Counting number of defined characters in each block and plane...")
count_defined_characters_per_block(all_chars, all_blocks)
Expand Down
5 changes: 4 additions & 1 deletion app/data/scripts/populate_sqlite_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ def import_data_from_csv_file(
return result
spinner.increment(amount=BATCH_SIZE)
if batch:
perform_batch_insert(session, batch)
result = perform_batch_insert(session, batch)
if result.failure:
spinner.failed(result.error)
return result
spinner.increment(amount=len(batch))
spinner.successful(f"Successfully added parsed {table.__tablename__} data to database")
return Result.Ok()
Expand Down
14 changes: 7 additions & 7 deletions app/data/scripts/save_parsed_data_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ def update_char_dict_enum_values(char_dict: CharDetailsDict) -> CharDetailsDict:
return char_dict


def get_combining_class(cc_value: int) -> CombiningClassCategory:
try:
return CombiningClassCategory(cc_value)
except ValueError:
return CombiningClassCategory(0)


def get_column_names(db_model: UnicodeModel, parsed: ParsedUnicodeData) -> list[str]:
return [name for name in db_model.__fields__ if name in parsed]

Expand Down Expand Up @@ -113,10 +120,3 @@ def sanitize_value_for_csv(val: bool | int | str | float) -> str:
def append_to_csv(csv_file: Path, text: str) -> None:
with csv_file.open("a") as csv:
csv.write(f"{text}\n")


def get_combining_class(cc_value: int) -> CombiningClassCategory:
try:
return CombiningClassCategory(cc_value)
except ValueError:
return CombiningClassCategory(0)
61 changes: 6 additions & 55 deletions app/data/scripts/update_all_data.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
import json
from pathlib import Path
from zipfile import ZIP_DEFLATED, ZipFile

from app.config.api_settings import UnicodeApiSettings
from app.core.result import Result
from app.data.scripts import (
backup_db_and_json_files,
bootstrap_unicode_data,
download_xml_unicode_database,
parse_xml_unicode_database,
populate_sqlite_database,
save_parsed_data_to_csv,
)
from app.data.scripts.script_types import BlockOrPlaneDetailsDict, CharDetailsDict
from app.data.util import run_command
from app.data.util.spinners import Spinner


def update_all_data() -> Result[None]:
result = bootstrap_unicode_data()
if result.failure or not result.value:
return Result.Fail(result.error or "")
return Result.Fail(result.error)
config = result.value

result = get_xml_unicode_database(config)
if result.failure:
return Result.Fail(result.error or "")
return Result.Fail(result.error)

result = parse_xml_unicode_database(config)
if result.failure:
return Result.Fail(result.error or "")
(all_planes, all_blocks, all_chars) = result.value or ([], [], [])
return Result.Fail(result.error)
(all_planes, all_blocks, all_chars) = result.value
update_json_files(config, all_planes, all_blocks, all_chars)

result = save_parsed_data_to_csv(config, all_planes, all_blocks, all_chars)
Expand All @@ -52,7 +51,7 @@ def update_all_data() -> Result[None]:
def get_xml_unicode_database(config: UnicodeApiSettings) -> Result[Path]:
spinner = Spinner()
result = download_xml_unicode_database(config)
if result.failure or not result.value:
if result.failure:
spinner.start("")
spinner.failed("Download failed! Please check the internet connection.")
return result
Expand Down Expand Up @@ -81,51 +80,3 @@ def update_json_files(
tangut_char_block_map = {int(char["codepoint_dec"]): int(char["block_id"]) for char in all_chars if char["_tangut"]}
config.TANGUT_CHARS_JSON.write_text(json.dumps(tangut_char_block_map, indent=4))
spinner.successful("Successfully created JSON files for parsed Unicode data")


def backup_db_and_json_files(config: UnicodeApiSettings) -> Result[None]:
spinner = Spinner()
spinner.start("Creating compressed backup files of SQLite DB and JSON files...")
backup_sqlite_db(config)
backup_json_files(config)
spinner.successful("Successfully created compressed backup files of SQLite DB and JSON files!")

spinner = Spinner()
result = upload_zip_file_to_s3(config, config.DB_ZIP_FILE)
if result.failure:
spinner.start("")
spinner.failed(result.error)
return result
config.DB_ZIP_FILE.unlink()

result = upload_zip_file_to_s3(config, config.JSON_ZIP_FILE)
if result.failure:
spinner.start("")
spinner.failed(result.error)
return result
config.JSON_ZIP_FILE.unlink()
spinner.start("")
spinner.successful("Successfully uploaded backup files to S3 bucket!")
return Result.Ok()


def backup_sqlite_db(config: UnicodeApiSettings):
with ZipFile(config.DB_ZIP_FILE, "w", ZIP_DEFLATED) as zip:
zip.write(config.DB_FILE, f"{config.DB_FILE.name}")


def backup_json_files(config: UnicodeApiSettings):
zip_file = config.JSON_FOLDER.joinpath("unicode_json.zip")
with ZipFile(zip_file, "w", ZIP_DEFLATED) as zip:
zip.write(config.PLANES_JSON, f"{config.PLANES_JSON.name}")
zip.write(config.BLOCKS_JSON, f"{config.BLOCKS_JSON.name}")
zip.write(config.CHAR_NAME_MAP, f"{config.CHAR_NAME_MAP.name}")
zip.write(config.UNIHAN_CHARS_JSON, f"{config.UNIHAN_CHARS_JSON.name}")
zip.write(config.TANGUT_CHARS_JSON, f"{config.TANGUT_CHARS_JSON.name}")


def upload_zip_file_to_s3(config: UnicodeApiSettings, local_file: Path) -> Result[None]:
result = run_command(f"s3cmd put {local_file} {config.S3_BUCKET_URL}/{config.UNICODE_VERSION}/{local_file.name} -P")
if result.failure:
return result
return Result.Ok()
2 changes: 1 addition & 1 deletion app/data/util/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: # type: ignore # noqa: PG
for remaining in reversed(range(max_attempts)):
try:
return func(*args, **kwargs)
except exceptions as ex: # noqa: PERF203
except exceptions as ex:
if remaining <= 0:
raise RetryLimitExceededError(func, max_attempts) from ex
if on_failure:
Expand Down
6 changes: 3 additions & 3 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
black==24.4.2
coverage==7.5.0
ipython==8.23.0
coverage==7.5.1
ipython==8.24.0
isort==5.13.2
mypy==1.10.0
pip-upgrader==1.4.15
Expand All @@ -13,7 +13,7 @@ pytest-mock==3.14.0
pytest-random-order==1.1.1
pytest-sugar==1.0.0
pyupgrade==3.15.2
ruff==0.4.3
ruff==0.4.4
snoop==0.4.3
tox==4.15.0
trogon==0.5.0
Loading

0 comments on commit 74e6e4c

Please sign in to comment.