-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
260 lines (211 loc) · 7.79 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
import streamlit as st
import pandas as pd
import os, re
from interface.landing_page import get_landing_page
from interface.retweet_graph_analysis import get_retweet_graph_analysis_page
from interface.top_image_analysis import get_top_image_analysis_page
from interface.candidate_analysis import get_candidate_analysis_page
from interface.explore_data import get_explore_data_page
import interface.SessionState as SessionState
# from interface.df_utils import load_pickled_df
from interface.image_urls import bucket_image_urls
import pickle
st.set_page_config(
page_title="VoterFraud2020 - a Twitter Dataset of Election Fraud Claims",
page_icon="./interface/img/favicon.ico",
initial_sidebar_state="expanded",
)
@st.cache
def insert_html_header(name, snippet):
a = os.path.dirname(st.__file__) + "/static/index.html"
with open(a, "r") as f:
data = f.read()
if snippet not in data:
print("Inserting {}".format(name))
with open(a, "w") as ff:
new_data = re.sub("<head>", "<head>" + snippet, data)
ff.write(new_data)
else:
print("{} already inserted".format(name))
# Disabled headers for streamlit serving compatibility
## Google analytics
# insert_html_header(
# "Google Analytics Tag",
# """
# <!-- Global site tag (gtag.js) - Google Analytics -->
# <script async src="https://www.googletagmanager.com/gtag/js?id=G-8VB4WZRD7C"></script>
# <script>
# window.dataLayer = window.dataLayer || [];
# function gtag(){dataLayer.push(arguments);}
# gtag('js', new Date());
# gtag('config', 'G-8VB4WZRD7C');
# </script>
# """,
# )
# insert_html_header(
# "Meta tags",
# """
# <!-- Primary Meta Tags -->
# <title>VoterFraud2020 - a Twitter Dataset of Election Fraud Claims</title>
# <meta name="title" content="VoterFraud2020 - a Twitter Dataset of Election Fraud Claims">
# <meta name="description" content="Voterfraud2020 is a multi-modal Twitter dataset with 7.6M tweets and 25.6M retweets from 2.6M users related to voter fraud claims.">
# <!-- Open Graph / Facebook -->
# <meta property="og:type" content="website">
# <meta property="og:url" content="https://metatags.io/">
# <meta property="og:title" content="VoterFraud2020 - a Twitter Dataset of Election Fraud Claims">
# <meta property="og:description" content="Voterfraud2020 is a multi-modal Twitter dataset with 7.6M tweets and 25.6M retweets from 2.6M users related to voter fraud claims.">
# <meta property="og:image" content="{}">
# <!-- Twitter -->
# <meta property="twitter:card" content="summary_large_image">
# <meta property="twitter:url" content="https://metatags.io/">
# <meta property="twitter:title" content="VoterFraud2020 - a Twitter Dataset of Election Fraud Claims">
# <meta property="twitter:description" content="Voterfraud2020 is a multi-modal Twitter dataset with 7.6M tweets and 25.6M retweets from 2.6M users related to voter fraud claims.">
# <meta property="twitter:image" content="{}">
# """.format(
# bucket_image_urls["retweet_graph_suspended"],
# bucket_image_urls["retweet_graph_suspended"],
# ),
# )
query_params = st.experimental_get_query_params()
app_state = st.experimental_get_query_params()
session_state = SessionState.get(first_query_params=query_params)
first_query_params = session_state.first_query_params
PAGES = {
"VoterFraud2020": get_landing_page,
"Retweet Graph Analysis": get_retweet_graph_analysis_page,
"Explore The Dataset": get_explore_data_page,
"Top Images": get_top_image_analysis_page,
"Midterm Candidates": get_candidate_analysis_page,
# "Overview": get_tweet_analysis_page,
# "Top Tweets": get_weekly_tweet_analysis_page,
# "Top Users": get_weekly_user_analysis_page,
# "URL Analysis": get_url_analysis_page,
# "Filter by crawled term": get_crawled_term_analysis_page,
}
PAGE_OPTIONS = list(PAGES.keys())
LIMIT = None
class SharedState:
pass
DATAFRAME_DIR = "./interface/data/"
@st.cache(allow_output_mutation=True)
def prepare_shared_state():
print("Preparing Shared State")
state = SharedState()
# with st.spinner("Loading user data"):
# state.user_df = pd.read_pickle(DATAFRAME_DIR + "df_users.pickle")
# state.crawled_terms_df = pd.read_pickle(DATAFRAME_DIR + "df_crawled_terms.pickle")
state.df_images_promoters = pd.read_csv(
"./interface/data/top_10_retweeted_promoters.csv", delimiter=";"
).drop("Unnamed: 0", axis=1)[
[
"num_of_unique_tweet_id",
"sum_of_retweets_by_cluster_1_to_4",
"sum_of_retweet_count",
"image_url",
]
]
state.df_candidates = pd.read_pickle("./interface/data/candidate_users.pickle")
state.df_candidates["user_community"] = state.df_candidates[
"user_community"
].astype(object)
state.df_images_detractors = pd.read_csv(
"./interface/data/top_10_retweeted_detractors.csv", delimiter=";"
).drop("Unnamed: 0", axis=1)[
[
"num_of_unique_tweet_id",
"retweets_by_cluster_0",
"sum_of_retweet_count",
"image_url",
]
]
state.df_images_suspended = pd.read_csv(
"./interface/data/top_10_retweeted_suspended.csv", delimiter=";"
).drop("Unnamed: 0", axis=1)[
[
"num_of_unique_tweet_id",
"retweets_by_suspended",
"sum_of_retweet_count",
"image_url",
]
]
# state.df_most_common_hashtags = pd.read_pickle(
# DATAFRAME_DIR + "df_most_common_hashtags.pickle"
# )
# state.df_most_common_tokens = pd.read_pickle(
# DATAFRAME_DIR + "df_most_common_tokens.pickle"
# )
# state.df_cooccurrence = pd.read_pickle(DATAFRAME_DIR + "df_cooccurrence.pickle")
# state.df_weekly_top_tweets = pd.read_pickle(
# DATAFRAME_DIR + "df_weekly_top_tweets.pickle"
# )
# state.df_weekly_top_users = pd.read_pickle(
# DATAFRAME_DIR + "df_weekly_top_users.pickle"
# )
with open(DATAFRAME_DIR + "coverage_stats.pickle", "rb") as f:
state.coverage_stats = pickle.load(f)
print("Shared State Loaded")
return state
def get_selected_page_index():
# query_params = st.experimental_get_query_params()
if "page" in app_state:
if "page" in first_query_params:
selected_page = first_query_params["page"][0]
if selected_page in PAGE_OPTIONS:
return PAGE_OPTIONS.index(selected_page)
return 0
shared_state = prepare_shared_state()
## CSS
st.markdown(
"""
<style>
img.logo {
margin: 0px auto;
margin-top: -45px;
margin-bottom: 25px;
width: 200px;
}
img.logo-2 {
margin: 0px auto;
margin-top: 25px;
width: 200px;
}
img {
max-width: 100%;
}
</style>
""",
unsafe_allow_html=True,
)
st.sidebar.markdown(
"""
<img src="{}" class="logo" alt="CT logo" />
""".format(
bucket_image_urls["jacobs-logo-transparent"]
), unsafe_allow_html=True
)
st.sidebar.title("Navigation")
selection = st.sidebar.radio("Go to", PAGE_OPTIONS, index=get_selected_page_index())
st.sidebar.markdown(
"""
- [The Paper (arXiv)](https://arxiv.org/abs/2101.08210)
### Download The Full Dataset
- [Github Repository](https://github.com/sTechLab/VoterFraud2020)
- [Fighshare](https://doi.org/10.6084/m9.figshare.13571084)
"""
)
# st.sidebar.markdown(
# """
# <img src="{}" class="logo-2" alt="Jacobs logo" />
# """.format(
# bucket_image_urls["jacobs-logo-transparent"]
# ), unsafe_allow_html=True
# )
app_state["page"] = selection
def get_query_params():
if PAGE_OPTIONS.index(selection) == 0:
return {}
else:
return {"page": app_state["page"]}
st.experimental_set_query_params(**get_query_params())
page = PAGES[selection]
page(shared_state)