Skip to content

Commit

Permalink
Merge pull request #13 from eciraci/dev
Browse files Browse the repository at this point in the history
Dev - distribute_ps_grid.py Added.
  • Loading branch information
eciraci authored Feb 5, 2024
2 parents 5883e1a + 6962d1e commit e4558b6
Show file tree
Hide file tree
Showing 19 changed files with 231 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/preliminary_test_conda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,6 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Run Unit test with pytest
shell: bash -l {0}
working-directory: ./
run: |
python -m pytest --import-mode=append test/
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
27 changes: 27 additions & 0 deletions data/shapefiles/csk_ps_sample_Nocera_Terinese_A_epsg4326.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE qgis PUBLIC 'http://mrcc.com/qgis.dtd' 'SYSTEM'>
<qgis version="3.32.2-Lima">
<identifier></identifier>
<parentidentifier></parentidentifier>
<language></language>
<type></type>
<title></title>
<abstract></abstract>
<links/>
<dates/>
<fees></fees>
<encoding></encoding>
<crs>
<spatialrefsys nativeFormat="Wkt">
<wkt></wkt>
<proj4></proj4>
<srsid>0</srsid>
<srid>0</srid>
<authid></authid>
<description></description>
<projectionacronym></projectionacronym>
<ellipsoidacronym></ellipsoidacronym>
<geographicflag>false</geographicflag>
</spatialrefsys>
</crs>
<extent/>
</qgis>
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions data/shapefiles/grid_CSG2_151_STR-007_ASC.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ISO-8859-1
Binary file added data/shapefiles/grid_CSG2_151_STR-007_ASC.dbf
Binary file not shown.
Binary file added data/shapefiles/grid_CSG2_151_STR-007_ASC.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions data/shapefiles/grid_CSG2_151_STR-007_ASC.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file added data/shapefiles/grid_CSG2_151_STR-007_ASC.shp
Binary file not shown.
Binary file added data/shapefiles/grid_CSG2_151_STR-007_ASC.shx
Binary file not shown.
140 changes: 140 additions & 0 deletions distribute_ps_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python
"""
Written by Enrico Ciraci'
Use a Spatial Join to distribute the PS points available within
the boundaries of a CSK frame over the relative along-track grid.
usage: distribute_ps_grid.py [-h] [--out_dir OUT_DIR]
[--out_format {parquet,shp}] [--plot] input_file grid_file
Distribute PS points over the CSK grid
positional arguments:
input_file Input file.
grid_file CSK Along Track Grid file.
options:
-h, --help show this help message and exit
--out_dir OUT_DIR, -O OUT_DIR
Output directory.
--out_format {parquet,shp}, -F {parquet,shp}
Output file format.
--plot, -P Plot the results showing the PS partition.
Python Dependencies
geopandas: Open source project to make working with geospatial data
in python easier: https://geopandas.org
dask-geopandas: Distributed geospatial operations using Dask:
https://dask-geopandas.readthedocs.io
matplotlib: Comprehensive library for creating static, animated, and
interactive visualizations in Python: https://matplotlib.org
"""
import os
import argparse
from datetime import datetime
import geopandas as gpd
import dask_geopandas as dgpd
import matplotlib.pyplot as plt


def distribute_ps_grid(input_file: str, grid_file: str) -> gpd.GeoDataFrame:
"""
Use a Spatial Join to distribute the PS points available within
Args:
input_file: Absolute Path to the input file.
grid_file: Absolute Path to the grid file.
Returns: None
"""
if not os.path.isfile(input_file):
raise FileNotFoundError(f"File not found: {input_file}")
# - Import PS Sample Data
gdf_smp = dgpd.read_file(input_file, npartitions=4)

# - Import CSK AlongTrack Grid
if not os.path.isfile(grid_file):
raise FileNotFoundError(f"File not found: {grid_file}")
gdf_csk = gpd.read_file(grid_file)

# - Print input/output file names
print(f"# - Input PS Sample: {input_file}")
print(f"# - Input CSK Grid: {grid_file}")
print("# - Compute Spatial Join between PS Sample and CSK Grid.")

# - Compute spatial join between set of points and grid
gdf_smp = gdf_smp.sjoin(gdf_csk, how="inner", predicate="within")

return gdf_smp


def main() -> None:
"""
Use a Spatial Join to distribute the PS points available within
the boundaries of a CSK frame over the relative along-track grid.
"""
# - Parse command line arguments
parser = argparse.ArgumentParser(
description="Distribute PS points over the CSK grid"
)
# - Input file
parser.add_argument('input_file', type=str,
help='Input file.')
# - Input CSK AT Grid file
parser.add_argument('grid_file', type=str,
help='CSK Along Track Grid file.')
# - Output directory - default is current working directory
parser.add_argument('--out_dir', '-O', type=str,
help='Output directory.', default=os.getcwd())
# - Output file format
parser.add_argument('--out_format', '-F', type=str,
help='Output file format.', default='parquet',
choices=['parquet', 'shp'])
# - Plot Intermediate Results
parser.add_argument('--plot', '-P', action='store_true',
help='Plot the results showing the PS partition.')
args = parser.parse_args()

# - import sample data
smp_input = args.input_file

# - Import CSK Along Track Grid
csk_at_grid = args.grid_file

# - Distribute PS points over the CSK grid
gdf_smp = distribute_ps_grid(smp_input, csk_at_grid
)
# - Drop unnecessary columns
print("# - Drop unnecessary columns & Convert Dask-GeoDataFrame "
"to GeoDataFrame.")
gdf_smp = gdf_smp.drop(columns=['index_right', 'type', 'rand_point',
'index', 'name', 'csm_path'])
gdf_smp = gdf_smp.reset_index(drop=True)
gdf_smp = gdf_smp.compute()

# - Save the results
print("# - Save the results.")
out_dir = args.out_dir
os.makedirs(out_dir, exist_ok=True)
out_file \
= os.path.join(out_dir, os.path.basename(smp_input)
.replace('.shp', f'_rc.{args.out_format}'))

if args.out_format == 'shp':
gdf_smp.to_file(out_file)
else:
if os.path.isfile(out_file):
os.remove(out_file)
gdf_smp.to_parquet(out_file)

if args.plot:
# - Plot the results
fig, ax = plt.subplots()
gdf_smp.plot(ax=ax, c=gdf_smp['row'], cmap='viridis', legend=True)
plt.show()


# - run main program
if __name__ == '__main__':
start_time = datetime.now()
main()
end_time = datetime.now()
print(f"# - Computation Time: {end_time - start_time}")
6 changes: 5 additions & 1 deletion environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ dependencies:
- pytest
- pytest-cov
- codecov
- coverage
- coverage
- pyogrio
- pyarrow
- dask
- dask-geopandas
Loading

0 comments on commit e4558b6

Please sign in to comment.