-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfilter_rss.py
executable file
·33 lines (26 loc) · 1.07 KB
/
filter_rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
from datetime import datetime
import pytz
def filter_rss_entries(input_file, output_file):
# Parse the input RSS file
tree = ET.parse(input_file)
root = tree.getroot()
# Define the current datetime with UTC timezone for comparison
current_datetime = datetime.now(pytz.utc)
# Find all <item> elements
for item in root.findall('.//item'):
# Parse the publication date of the current item
pubDate = item.find('pubDate').text
# Convert the pubDate string to a datetime object
pubDate_datetime = datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S GMT')
pubDate_datetime = pubDate_datetime.replace(tzinfo=pytz.utc)
# Remove the item if its publication date is in the future
if pubDate_datetime > current_datetime:
root.find('.//channel').remove(item)
# Write the modified tree to a new RSS file
tree.write(output_file)
# Example usage
input_file = '64er_all.rss'
output_file = '64er.rss'
filter_rss_entries(input_file, output_file)