-
Notifications
You must be signed in to change notification settings - Fork 24
/
create-cities-csv.py
99 lines (82 loc) · 2.97 KB
/
create-cities-csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import csv
import json
import time
import httpx
# This script requires the third-party module httpx, which you can install like:
# python3 -m pip install httpx
# This also requires an API key for https://geocodeapi.io, a service I used to do
# geocoding. They have a free plan
geocode_api_key = "PUT_GEOCODE_API_KEY_HERE"
# Export a CSV that for each city lists its GPS coordinates and the number of patients there
def main():
# This dictionary maps names of cities (in format "City, State", like "New York, NY")
# to a dictionary with info about that city (number of patients, GPS coordinates)
cities = {}
# Count how many patients are in each city
with open("aflds-patients.csv") as f:
reader = csv.DictReader(f)
for row in reader:
city = f"{row['city']}, {row['state']}"
if city not in cities:
cities[city] = {"count": 0}
cities[city]["count"] += 1
print(f"Found patients in {len(cities):,} cities")
# Look up GPS coordinates for each city
for city in cities:
# Give each API request 3 tries, in case a connection fails
tries = 0
success = False
while not success:
try:
print(
f"Loading GPS coordinates for: {city} ({cities[city]['count']} patients)"
)
r = httpx.get(
"https://app.geocodeapi.io/api/v1/search",
params={
"apikey": geocode_api_key,
"text": city,
"size": 1,
"boundary.country": "US",
},
)
success = True
# The connection failed
except:
tries += 1
if tries == 3:
print("Failed, skipping")
print("Sleeping 2s and trying again")
time.sleep(2)
try:
data = json.loads(r.text)
if "features" in data and len(data["features"]) > 0:
cities[city]["lon"] = data["features"][0]["geometry"]["coordinates"][0]
cities[city]["lat"] = data["features"][0]["geometry"]["coordinates"][1]
except:
cities[city]["lon"] = None
cities[city]["lat"] = None
# Write the CSV file
headers = [
"count",
"city",
"lon",
"lat",
"label",
]
csv_filename = "cities.csv"
with open(csv_filename, "w") as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
for city in cities:
writer.writerow(
{
"count": cities[city]["count"],
"city": city,
"lon": cities[city]["lat"],
"lat": cities[city]["lon"],
"label": f"{city} ({cities[city]['count']})",
}
)
if __name__ == "__main__":
main()