-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Download dem file from JAXA if not provided by the user (#380)
* feat: auto add dem file if not provided by the user for more information, see https://pre-commit.ci * fix: scrapy signals issue with signals for more information, see https://pre-commit.ci * feat: add constants for dem radio data * feat: add reducer and action ro set demTYpe * feat: add dem type * feat: auto add dem file if not provided by the user for more information, see https://pre-commit.ci * fix: scrapy signals issue with signals for more information, see https://pre-commit.ci * refactor: remove title from dem type switch * fix: pre commit fix --------- Co-authored-by: Bijay Rauniyar <[email protected]>
- Loading branch information
1 parent
c507709
commit 7b89824
Showing
13 changed files
with
725 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import math | ||
from shapely.geometry import Polygon, box | ||
|
||
|
||
def parse_polygon(coordinates): | ||
""" | ||
Parses the input coordinates into a Shapely Polygon object. | ||
""" | ||
return Polygon(coordinates) | ||
|
||
|
||
def get_bounding_box(polygon): | ||
""" | ||
Returns the bounding box of the polygon as (min_lon, min_lat, max_lon, max_lat). | ||
""" | ||
min_lon, min_lat, max_lon, max_lat = polygon.bounds | ||
return min_lon, min_lat, max_lon, max_lat | ||
|
||
|
||
def get_5x5_tiles(min_lon, min_lat, max_lon, max_lat): | ||
""" | ||
Calculates all 5°×5° tiles that intersect with the bounding box. | ||
Returns a list of tuples representing the lower-left corner of each tile. | ||
""" | ||
tiles = [] | ||
|
||
# Define the origin for 5x5 tiles | ||
origin_lon = -180 | ||
origin_lat = -90 | ||
|
||
# Calculate the starting and ending indices for longitude and latitude | ||
start_lon_idx = math.floor((min_lon - origin_lon) / 5) | ||
end_lon_idx = math.floor((max_lon - origin_lon) / 5) | ||
start_lat_idx = math.floor((min_lat - origin_lat) / 5) | ||
end_lat_idx = math.floor((max_lat - origin_lat) / 5) | ||
|
||
for lon_idx in range(start_lon_idx, end_lon_idx + 1): | ||
for lat_idx in range(start_lat_idx, end_lat_idx + 1): | ||
tile_lon = origin_lon + lon_idx * 5 | ||
tile_lat = origin_lat + lat_idx * 5 | ||
tiles.append((tile_lon, tile_lat)) | ||
|
||
return tiles | ||
|
||
|
||
def get_1x1_tiles_within_5x5(tile_lon, tile_lat, polygon): | ||
""" | ||
For a given 5°×5° tile, calculates all 1°×1° tiles that intersect with the polygon. | ||
Returns a list of tuples representing the lower-left corner of each 1x1 tile. | ||
""" | ||
tiles = [] | ||
|
||
for lon in range(int(tile_lon), int(tile_lon + 5)): | ||
for lat in range(int(tile_lat), int(tile_lat + 5)): | ||
tile_polygon = box(lon, lat, lon + 1, lat + 1) | ||
if polygon.intersects(tile_polygon): | ||
tiles.append((lon, lat)) | ||
|
||
return tiles | ||
|
||
|
||
def format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat): | ||
""" | ||
Formats the tile name based on the naming convention: | ||
N015W075_N019W071 | ||
""" | ||
# Format 5x5 tile part | ||
lat_5 = f"{abs(tile5_lat):03d}" | ||
lat_dir_5 = "N" if tile5_lat >= 0 else "S" | ||
lon_5 = f"{abs(tile5_lon):03d}" | ||
lon_dir_5 = "E" if tile5_lon >= 0 else "W" | ||
tile5 = f"{lat_dir_5}{lat_5}{lon_dir_5}{lon_5}" | ||
|
||
# Format 1x1 tile part | ||
lat_1 = f"{abs(tile1_lat):03d}" | ||
lat_dir_1 = "N" if tile1_lat >= 0 else "S" | ||
lon_1 = f"{abs(tile1_lon):03d}" | ||
lon_dir_1 = "E" if tile1_lon >= 0 else "W" | ||
tile1 = f"{lat_dir_1}{lat_1}{lon_dir_1}{lon_1}" | ||
|
||
return f"{tile5}_{tile1}" | ||
|
||
|
||
def get_covering_tiles(polygon_geojson): | ||
""" | ||
Main function to get the list of tile names covering the polygon. | ||
""" | ||
# Parse the GeoJSON polygon | ||
polygon = parse_polygon(polygon_geojson["coordinates"][0]) | ||
|
||
# Get bounding box | ||
min_lon, min_lat, max_lon, max_lat = get_bounding_box(polygon) | ||
|
||
# Get all relevant 5x5 tiles | ||
tiles_5x5 = get_5x5_tiles(min_lon, min_lat, max_lon, max_lat) | ||
|
||
# Initialize a set to avoid duplicates | ||
tile_names = set() | ||
|
||
# Iterate through each 5x5 tile and find intersecting 1x1 tiles | ||
for tile5_lon, tile5_lat in tiles_5x5: | ||
tiles_1x1 = get_1x1_tiles_within_5x5(tile5_lon, tile5_lat, polygon) | ||
for tile1_lon, tile1_lat in tiles_1x1: | ||
name = format_tile_name(tile5_lon, tile5_lat, tile1_lon, tile1_lat) | ||
tile_names.add(name) | ||
|
||
return sorted(tile_names) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import os | ||
import zipfile | ||
import io | ||
import scrapy | ||
|
||
from osgeo import gdal | ||
from pathlib import Path | ||
from app.config import settings | ||
|
||
base_dir = Path(__file__).resolve().parent | ||
|
||
|
||
class TifSpider(scrapy.Spider): | ||
name = "tif_spider" | ||
allowed_domains = ["eorc.jaxa.jp"] | ||
merged_file_path = None | ||
|
||
def __init__(self, coordinates, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.tif_files = [] | ||
self.coordinates = coordinates.split(",") | ||
|
||
headers = { | ||
"authority": "www.eorc.jaxa.jp", | ||
"path": "/ALOS/en/aw3d30/data/html_v2404/xml/{caption}_5_5.xml", | ||
"method": "GET", | ||
"accept": "application/xml, text/xml, */*; q=0.01", | ||
"accept-encoding": "gzip, deflate, br, zstd", | ||
"accept-language": "en-US,en;q=0.9", | ||
"authorization": f"Basic {settings.JAXA_AUTH_TOKEN}", | ||
"cache-control": "no-cache", | ||
"pragma": "no-cache", | ||
"sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"', | ||
"sec-ch-ua-mobile": "?0", | ||
"sec-ch-ua-platform": '"Linux"', | ||
"sec-fetch-dest": "empty", | ||
"sec-fetch-mode": "cors", | ||
"sec-fetch-site": "same-origin", | ||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", | ||
"x-requested-with": "XMLHttpRequest", | ||
} | ||
|
||
def start_requests(self): | ||
urls = [] | ||
for coordinate in self.coordinates: | ||
coords = coordinate.split("_") | ||
five_by_five, one_by_one = coords[0], coords[1] | ||
urls.append( | ||
f"https://www.eorc.jaxa.jp/ALOS/aw3d30/data/release_v2404/{five_by_five}/{one_by_one}.zip", | ||
) | ||
|
||
for url in urls: | ||
yield scrapy.Request(url=url, callback=self.parse) | ||
|
||
def parse(self, response): | ||
temp_dir = os.path.join(os.getcwd(), "temp") | ||
os.makedirs(temp_dir, exist_ok=True) | ||
try: | ||
with zipfile.ZipFile(io.BytesIO(response.body)) as zip_file: | ||
for file_name in zip_file.namelist(): | ||
if file_name.endswith("DSM.tif"): | ||
# Save .tif file into the temp directory | ||
temp_path = os.path.join(temp_dir, os.path.basename(file_name)) | ||
with zip_file.open(file_name) as tif_file: | ||
with open(temp_path, "wb") as out_file: | ||
out_file.write(tif_file.read()) | ||
self.tif_files.append(temp_path) | ||
except Exception: | ||
pass | ||
|
||
def closed(self, reason): | ||
if self.tif_files: | ||
self.merged_file_path = self.merge_tiles() | ||
|
||
def merge_tiles(self): | ||
vrt_file = "merged.vrt" | ||
gdal.BuildVRT(vrt_file, self.tif_files) | ||
output_file = str(base_dir / "merged.tif") | ||
|
||
gdal.Translate(output_file, vrt_file) | ||
for file in self.tif_files: | ||
os.remove(file) | ||
os.remove(vrt_file) | ||
return output_file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import os | ||
import io | ||
import asyncio | ||
import multiprocessing | ||
from pathlib import Path | ||
|
||
from fastapi import UploadFile | ||
from scrapy.crawler import CrawlerProcess | ||
from scrapy.utils.project import get_project_settings | ||
|
||
|
||
from app.projects import project_logic | ||
from app.jaxa.jaxa_coordinates import get_covering_tiles | ||
from app.jaxa.tif_spider import TifSpider | ||
from app.db import database | ||
|
||
base_dir = Path(__file__).resolve().parent | ||
|
||
|
||
def run_crawler_and_upload(coordinates_str: str, tif_file_path: str, project_id): | ||
""" | ||
Runs the Scrapy crawler and uploads the DEM file to S3. | ||
""" | ||
try: | ||
# Initialize and start the Scrapy crawler | ||
process = CrawlerProcess(get_project_settings()) | ||
process.crawl(TifSpider, coordinates=coordinates_str) | ||
process.start() | ||
|
||
asyncio.run(upload_dem_file_s3_sync(tif_file_path, project_id)) | ||
|
||
except Exception: | ||
pass | ||
|
||
|
||
async def upload_dem_file_s3_sync(tif_file_path: str, project_id): | ||
""" | ||
Synchronously uploads the DEM file to S3 and updates the database. | ||
""" | ||
try: | ||
with open(tif_file_path, "rb") as dem_file: | ||
file_bytes = dem_file.read() | ||
file_obj = io.BytesIO(file_bytes) | ||
dem = UploadFile(file=file_obj, filename="dem.tif") | ||
|
||
dem_url = await project_logic.upload_file_to_s3(project_id, dem, "dem.tif") | ||
|
||
pool = await database.get_db_connection_pool() | ||
async with pool as pool_instance: | ||
async with pool_instance.connection() as conn: | ||
await project_logic.update_url(conn, project_id, dem_url) | ||
|
||
os.remove(tif_file_path) | ||
except Exception: | ||
pass | ||
|
||
|
||
async def upload_dem_file(geometry, project_id): | ||
""" | ||
Initiates the DEM file fetching and uploading process. | ||
""" | ||
tiles = get_covering_tiles(geometry) | ||
tif_file_path = str(base_dir / "merged.tif") | ||
coordinates_str = ",".join(tiles) | ||
|
||
try: | ||
p = multiprocessing.Process( | ||
target=run_crawler_and_upload, | ||
args=(coordinates_str, tif_file_path, project_id), | ||
) | ||
p.start() | ||
|
||
except Exception: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.