-
Notifications
You must be signed in to change notification settings - Fork 1
/
daily-rss-hu-fa.py
executable file
·67 lines (56 loc) · 2.3 KB
/
daily-rss-hu-fa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/python
# -*- coding: iso-8859-2 -*-
# This program generates RSS feeds of the Hungarian Wikipedia's front page articles.
import re, urllib, datetime
from rsslib import WPFeed, CacheItem, date_vars, encode_title
# Settings
settings = {
'time_range': 14, # purge cache after how many days
'days_of_week': [1, 4], # Mo: 1, Su: 7
'output_filename': '/home/t/tgergo/public_html/fa.xml',
'cache_filename': '/home/t/tgergo/temp/fa_cache.pickle',
'base_url': 'http://hu.wikipedia.org/',
'rss_title': u'Wikipédia: Kiemelt szócikk',
'rss_link': 'http://hu.wikipedia.org/wiki/Kezd%C5%91lap',
'rss_description': u'Kiemelt szócikkek a magyar Wikipédia kezdõlapjáról'
}
feed = WPFeed(settings)
template_title = u'Kezdõlap kiemelt cikkei/%(year)d-%(week)d-%(n)d'
def get_content(date):
title = template_title % date_vars(date)
url = 'http://hu.wikipedia.org/w/index.php?title=Sablon:%s&action=render' % encode_title(title)
content = feed.get_html(url, date).decode('utf-8')
return content
def unicode_decode(match):
codepoint = match.group(1)
return unichr(int(codepoint, 16))
def get_title(date):
api_call = 'http://hu.wikipedia.org/w/api.php?action=expandtemplates&text={{Sablon:%(page)s|%(year)s|%(week)s|%(n)s}}&format=yaml'
url = api_call % date_vars(date, {
'page': encode_title(u'Kezdõlapra került szócikkek listája'),
})
yaml = feed.get_html(url, date).decode('utf-8')
m = re.search(r'"\*":"(.+?)"', yaml, re.MULTILINE)
title_json = m.group(1)
title = re.sub(r'\\u([\da-f]{4})', unicode_decode, title_json)
return title
def main():
today = datetime.date.today()
one_day = datetime.timedelta(days = 1)
dates = [today - one_day*x for x in range(settings['time_range'])]
def is_right_day(date):
return date.isocalendar()[2] in settings['days_of_week']
dates = filter(is_right_day, dates)
items = []
for date in dates:
title = get_title(date)
items.append({
'title' : u'%(week)d. hét / %(n)d.: %(title)s' % date_vars(date, {'title': title}),
'url' : 'http://hu.wikipedia.org/wiki/%s' % encode_title(title),
'content': get_content(date),
})
feed.rss(items)
feed.save()
# Don't run if we're imported
if __name__ == '__main__':
main()