-
Notifications
You must be signed in to change notification settings - Fork 0
/
web_parser_criticalmass.de.py
88 lines (77 loc) · 2.39 KB
/
web_parser_criticalmass.de.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import requests
from bs4 import BeautifulSoup
from ipdb import set_trace
import re
import os
import json
from datetime import datetime
from dateutil.relativedelta import relativedelta, FR
session = requests.Session()
base = "http://criticalmass.de/"
def get_cities():
resp = session.get(base)
bs = BeautifulSoup(resp.content.decode(), 'html.parser')
select = bs.find('select')
cities = []
for option in select.findAll('option'):
try:
#set_trace()
city = {}
city['name'] = option.contents[0].strip()
city['url'] = option.attrs['value']
cities.append(city)
#print(city)
except AttributeError as e:
print("Error parsing cities", e)
#print(cities)
return cities
def get_details(name, url):
#print("Parsing city '%s'" % name)
#resp = session.get(base + url)
with open("html/%s" % url, "r") as f:
html = f.read()
if "Leider haben wir zur Zeit nicht genug Daten über Critical Mass " in html:
return
bs = BeautifulSoup(html, 'html.parser')
details = {}
details['name'] = name
details['url'] = url
desc = "\n".join([t.text.strip() for t in bs.findAll(class_="panel-body")])
details['description'] = desc
try:
url_text = bs.find(class_='list-unstyled').text.strip()
urls = "\n".join([u.strip() for u in url_text.splitlines() if 'Likes' not in u])
details['urls'] = urls
#print(details)
except AttributeError as e :
pass
# no urls
return details
#cities = get_cities()
#get_details('Darmstadt', 'erlangen.html')
#exit()
def generate_json():
c = []
for city in os.listdir("html/"):
#get_details(city['name'], city['url'])
detail = get_details(city, city)
if detail:
c.append(detail)
print(json.dumps(c))
def find_day(desc):
for i in range(5):
print(datetime.today()+ relativedelta(months=i))
print("")
for i in range(5):
print(datetime.today()+ relativedelta(months=i) + relativedelta(weekday=FR(-1)))
#return datetime.now() + relativedelta(weekday=FR(-1))
def main():
critical_masses = json.load(open("aktueller_stand.json"))
for mass in critical_masses[1:]:
desc = mass['description']
print(desc)
date = find_day(desc)
print(date)
exit()
if __name__ == '__main__':
main()