Skip to content

Commit

Permalink
Merge pull request #49 from a-luna:refactor-app.data.scripts_20240527
Browse files Browse the repository at this point in the history
++Refactor app.data.scripts.update_all_data module to package
  • Loading branch information
a-luna authored May 28, 2024
2 parents d1ebaa2 + 1a7ce11 commit 7d76a2b
Show file tree
Hide file tree
Showing 23 changed files with 265 additions and 264 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ SHELL ["/bin/bash", "-c"]

ARG ENV
ARG UNICODE_VERSION
ARG API_ROOT
ARG REDIS_HOST
ARG REDIS_PORT
ARG REDIS_DB
Expand All @@ -14,6 +15,7 @@ ARG TEST_HEADER

ENV ENV=${ENV}
ENV UNICODE_VERSION=${UNICODE_VERSION}
ENV API_ROOT=${API_ROOT}
ENV REDIS_HOST=${REDIS_HOST}
ENV REDIS_PORT=${REDIS_PORT}
ENV REDIS_DB=${REDIS_DB}
Expand All @@ -28,6 +30,7 @@ RUN touch /code/.env
RUN echo "ENV=$ENV" >> /code/.env
RUN echo "PYTHONPATH=." >> /code/.env
RUN echo "UNICODE_VERSION=$UNICODE_VERSION" >> /code/.env
RUN echo "API_ROOT=$API_ROOT" >> /code/.env
RUN echo "REDIS_HOST=$REDIS_HOST" >> /code/.env
RUN echo "REDIS_PORT=$REDIS_PORT" >> /code/.env
RUN echo "REDIS_DB=$REDIS_DB" >> /code/.env
Expand Down
6 changes: 3 additions & 3 deletions app/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import click
from trogon import tui

from app.data.scripts.sync_req_files import sync_requirements_files as _sync_requirements_files
from app.data.scripts.update_all_data import update_all_data as _update_all_data
from app.data.scripts.update_test_data import update_test_data as _update_test_data
from app.data.scripts import sync_requirements_files as _sync_requirements_files
from app.data.scripts import update_all_data as _update_all_data
from app.data.scripts import update_test_data as _update_test_data
from app.docs.api_docs.readme import update_readme as _update_readme


Expand Down
47 changes: 14 additions & 33 deletions app/config/api_settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import logging
import os
from dataclasses import dataclass, field
from datetime import timedelta
Expand All @@ -14,26 +13,22 @@

class ApiSettingsDict(TypedDict):
ENV: str
UNICODE_VERSION: str
PROJECT_NAME: str
API_ROOT: str
API_VERSION: str
UNICODE_VERSION: str
REDIS_PW: str
REDIS_HOST: str
REDIS_PORT: int
REDIS_DB: int
RATE_LIMIT_PER_PERIOD: int
RATE_LIMIT_PERIOD_SECONDS: timedelta
RATE_LIMIT_BURST: int
SERVER_NAME: str
SERVER_HOST: str


UNICODE_ORG_ROOT = "https://www.unicode.org/Public"
UNICODE_XML_FOLDER = "ucdxml"
HTTP_BUCKET_URL = "https://unicode-api.us-southeast-1.linodeobjects.com"
S3_BUCKET_URL = "s3://unicode-api"
DEV_API_ROOT = "http://localhost:3507"
PROD_API_ROOT = "https://unicode-api.aaronluna.dev"

XML_FILE_NAME = "ucd.all.flat.xml"
XML_ZIP_FILE_NAME = "ucd.all.flat.zip"
Expand All @@ -55,19 +50,17 @@ def get_latest_unicode_version() -> str: # pragma: no cover
@dataclass
class UnicodeApiSettings:
ENV: str
UNICODE_VERSION: str
PROJECT_NAME: str
API_ROOT: str
API_VERSION: str
UNICODE_VERSION: str
REDIS_PW: str
REDIS_HOST: str
REDIS_PORT: int
REDIS_DB: int
RATE_LIMIT_PER_PERIOD: int
RATE_LIMIT_PERIOD_SECONDS: timedelta
RATE_LIMIT_BURST: int
SERVER_NAME: str
SERVER_HOST: str
API_ROOT: str = field(init=False, default="")
PROJECT_NAME: str = field(init=False, default="")
ROOT_FOLDER: Path = field(init=False)
APP_FOLDER: Path = field(init=False)
DATA_FOLDER: Path = field(init=False)
Expand Down Expand Up @@ -105,7 +98,7 @@ def __post_init__(self) -> None:
json_folder = version_folder.joinpath("json")
csv_folder = version_folder.joinpath("csv")

self.API_ROOT = DEV_API_ROOT if self.is_dev else PROD_API_ROOT
self.PROJECT_NAME = f"Unicode API{"" if "PROD" in self.ENV else f' ({self.ENV})'}"
self.ROOT_FOLDER = ROOT_FOLDER
self.APP_FOLDER = ROOT_FOLDER.joinpath("app")
self.DATA_FOLDER = data_folder
Expand Down Expand Up @@ -147,18 +140,14 @@ def is_prod(self): # pragma: no cover
def is_test(self): # pragma: no cover
return "TEST" in self.ENV

@property
def api_settings_report(self) -> str:
return f"API Settings: (ENV: {self.ENV}) (UNICODE_VERSION: {self.UNICODE_VERSION})"

@property
def rate_limit_settings_report(self) -> str:
rate = f"{self.RATE_LIMIT_PER_PERIOD} request{s(self.RATE_LIMIT_PER_PERIOD)}"
interval = self.RATE_LIMIT_PERIOD_SECONDS.total_seconds()
period = f"{interval}second{s(interval)}"
interval = f"{self.RATE_LIMIT_PERIOD_SECONDS.total_seconds()}"
period = f"{'' if interval == '1.0' else f'{interval} '}second{s(interval)}"
rate_limit_settings = f"Rate Limit Settings: {rate} per {period}"
burst_enabled = self.RATE_LIMIT_BURST > 1
if burst_enabled: # pragma: no cover
if burst_enabled:
rate_limit_settings += f" (+{self.RATE_LIMIT_BURST} request burst allowance)"
return rate_limit_settings

Expand Down Expand Up @@ -222,44 +211,36 @@ def get_api_settings() -> UnicodeApiSettings: # pragma: no cover
env_vars = read_dotenv_file(DOTENV_FILE)
settings: ApiSettingsDict = {
"ENV": env_vars.get("ENV", "DEV"),
"UNICODE_VERSION": env_vars.get("UNICODE_VERSION", get_latest_unicode_version()),
"PROJECT_NAME": "Unicode API",
"API_ROOT": env_vars.get("API_ROOT", ""),
"API_VERSION": "/v1",
"UNICODE_VERSION": env_vars.get("UNICODE_VERSION", get_latest_unicode_version()),
"REDIS_PW": env_vars.get("REDIS_PW", ""),
"REDIS_HOST": env_vars.get("REDIS_HOST", ""),
"REDIS_PORT": int(env_vars.get("REDIS_PORT", "6379")),
"REDIS_DB": int(env_vars.get("REDIS_DB", "0")),
"RATE_LIMIT_PER_PERIOD": int(env_vars.get("RATE_LIMIT_PER_PERIOD", "1")),
"RATE_LIMIT_PERIOD_SECONDS": timedelta(seconds=int(env_vars.get("RATE_LIMIT_PERIOD_SECONDS", "100"))),
"RATE_LIMIT_BURST": int(env_vars.get("RATE_LIMIT_BURST", "10")),
"SERVER_NAME": "unicode-api.aaronluna.dev",
"SERVER_HOST": PROD_API_ROOT,
}
return UnicodeApiSettings(**settings)


def get_test_settings() -> UnicodeApiSettings:
settings: ApiSettingsDict = {
"ENV": "TEST",
"UNICODE_VERSION": "15.0.0",
"PROJECT_NAME": "Test Unicode API",
"API_ROOT": "",
"API_VERSION": "/v1",
"UNICODE_VERSION": "15.0.0",
"REDIS_PW": "",
"REDIS_HOST": "",
"REDIS_PORT": 0,
"REDIS_DB": 0,
"RATE_LIMIT_PER_PERIOD": 2,
"RATE_LIMIT_PERIOD_SECONDS": timedelta(seconds=1),
"RATE_LIMIT_BURST": 1,
"SERVER_NAME": "",
"SERVER_HOST": "",
}
return UnicodeApiSettings(**settings)


def get_settings() -> UnicodeApiSettings:
settings = get_test_settings() if "TEST" in os.environ.get("ENV", "DEV") else get_api_settings()
logger = logging.getLogger("app.api")
logger.debug(settings.api_settings_report)
logger.debug(settings.rate_limit_settings_report)
return settings
return get_test_settings() if "TEST" in os.environ.get("ENV", "DEV") else get_api_settings()
21 changes: 12 additions & 9 deletions app/core/rate_limit.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)

RATE_LIMIT_ROUTE_REGEX = re.compile(r"^\/v1\/blocks|characters|codepoints|planes")
DOCKER_IP_REGEX = re.compile(r"172\.17\.0\.\d{1,3}")


@dataclass
Expand Down Expand Up @@ -113,18 +114,20 @@ def rate_limit_applies_to_route(self, request: Request) -> bool: # pragma: no c
return bool(RATE_LIMIT_ROUTE_REGEX.search(request.url.path))

def client_ip_is_external(self, request: Request, client_ip: str) -> bool: # pragma: no cover
if client_ip in ["localhost", "127.0.0.1", "testserver"] or client_ip.startswith("172.17.0."):
if any(host in client_ip for host in ["localhost", "127.0.0.1", "testserver"]):
return False
if DOCKER_IP_REGEX.search(client_ip):
return False
if "sec-fetch-site" in request.headers and request.headers["sec-fetch-site"] == "same-site":
self.log_request_from_internal_ip(client_ip, request)
return False
if "sec-fetch-site" in request.headers:
if request.headers["sec-fetch-site"] == "same-site":
self.logger.info(f"##### BYPASS RATE LIMITING (SAME SITE, IP: {client_ip}) #####")
for log in get_dict_report(request.headers):
self.logger.info(log)
return False
else:
return True
return True

def log_request_from_internal_ip(self, client_ip: str, request: Request) -> None:
self.logger.info(f"##### BYPASS RATE LIMITING (SAME SITE, IP: {client_ip}) #####")
for log in get_dict_report(request.headers):
self.logger.info(log)

def get_allowed_at(self, tat: float) -> float:
return (dtaware_fromtimestamp(tat) - self.delay_tolerance_ms).timestamp()

Expand Down
7 changes: 6 additions & 1 deletion app/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
from app.constants import DATE_MONTH_NAME, UNICODE_VERSION_RELEASE_DATES


def s(x: list | int | float) -> str:
def s(x: list | int | float | str) -> str:
if isinstance(x, list):
return "s" if len(x) > 1 else ""
if isinstance(x, str):
try:
return "s" if float(x) > 1 else ""
except ValueError:
return ""
return "s" if x > 1 else ""


Expand Down
10 changes: 4 additions & 6 deletions app/data/scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# flake8: noqa
from app.data.scripts.backup_db_and_json_files import backup_db_and_json_files
from app.data.scripts.bootstrap_unicode_data import bootstrap_unicode_data
from app.data.scripts.get_xml_unicode_db import download_xml_unicode_database
from app.data.scripts.parse_xml_unicode_db import parse_xml_unicode_database
from app.data.scripts.populate_sqlite_db import populate_sqlite_database
from app.data.scripts.save_parsed_data_to_csv import save_parsed_data_to_csv
from app.data.scripts.update_all_data.update_all_data import update_all_data
from app.data.scripts.update_test_data.update_test_data import update_test_data
from app.data.scripts.get_prod_data import get_prod_data
from app.data.scripts.sync_req_files import sync_requirements_files
55 changes: 0 additions & 55 deletions app/data/scripts/backup_db_and_json_files.py

This file was deleted.

82 changes: 0 additions & 82 deletions app/data/scripts/update_all_data.py

This file was deleted.

7 changes: 7 additions & 0 deletions app/data/scripts/update_all_data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# flake8: noqa
from app.data.scripts.update_all_data.backup_db_and_json_files import backup_db_and_json_files
from app.data.scripts.update_all_data.get_api_settings import get_api_settings
from app.data.scripts.update_all_data.get_xml_unicode_db import download_xml_unicode_database
from app.data.scripts.update_all_data.parse_xml_unicode_db import parse_xml_unicode_database
from app.data.scripts.update_all_data.populate_sqlite_db import populate_sqlite_database
from app.data.scripts.update_all_data.save_parsed_data import save_parsed_data
Loading

0 comments on commit 7d76a2b

Please sign in to comment.