-
Notifications
You must be signed in to change notification settings - Fork 1
/
stratified_round_size_script.py
143 lines (122 loc) · 6.09 KB
/
stratified_round_size_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
This script generates computes and stores in a json file the polling stratum
round sizes that achieve a desired probability of stopping under the alternative
hypothesis that the election is truly as announced. It produces both
Minerva and R2 Bravo round sizes for various polling stratum sizes (as a
percentage of relevant ballots).
Oliver Broadrick 2020
"""
from round_sizes import find_sample_size_for_stopping_prob_efficiently, find_sample_size_for_stopping_prob_efficiently_r2bravo
import math
import matplotlib.pyplot as plt
import json
# loop through the various margins for which I would like results (run overnight)
for fractional_margin in [.01, .02, .03, .04, .05, .06, .07, .08]:
# track all data in a struct for each margin, output to a json file
data = {}
data['audits'] = []
# file for data named based on margin
data_file_name = 'data/data_stratified_'+str(round(fractional_margin * 100))+'_percent_margin.txt'
# loop through the various percent sizes of the polling stratum
for percent_polling in [.05,.1,.15,.2,.25,.3,.35,.4,.45,.5,.55,.6,.65,.7,.75,.8,.85,.9,.95]:
minerva_right = None
r2bravo_right = None
"""
# define a right bounds for the round size searchs based on the previous size found
if (percent_polling == .05):
minerva_right = None
r2bravo_right = None
elif (percent_polling < .25):
# times 4 is sufficient here (I tested it)
minerva_prev = data['audits'][len(data['audits']) - 1]['minerva_round_size']
r2bravo_prev = data['audits'][len(data['audits']) - 1]['r2bravo_round_size']
minerva_right = 4 * minerva_prev
print(minerva_right)
r2bravo_right = 4 * r2bravo_prev
print(r2bravo_right)
else:
# times 2 is sufficient here (I tested it)
minerva_prev = data['audits'][len(data['audits']) - 1]['minerva_round_size']
r2bravo_prev = data['audits'][len(data['audits']) - 1]['r2bravo_round_size']
minerva_right = 2 * minerva_prev
print(minerva_right)
r2bravo_right = 2 * r2bravo_prev
print(r2bravo_right)
"""
# risk limit (same as suite example 1)
alpha = 0.1
# overall relevant ballot tallies (same as suite example 1)
N_relevant = 104000
N_w = round(N_relevant * (1 + fractional_margin) / 2)
N_l = 104000 - N_w
N_w_fraction = N_w / N_relevant
# divide the ballots into strata
# all strata will have the same margin as the overall contest
N_2 = round(percent_polling * N_relevant)
N_1 = N_relevant - N_2
N_w1 = round(N_w_fraction * N_1)
N_l1 = N_1 - N_w1
N_w2 = N_w - N_w1
N_l2 = N_2 - N_w2
margin = N_w1 + N_w2 - N_l1 - N_l2
# sanity check
assert (N_l2 + N_l1 == N_l)
assert (N_w2 + N_w1 == N_w)
assert (N_1 + N_2 == N_relevant)
# print for viewing pleasure
print("\nfractional_margin: "+str(fractional_margin))
print("percent_polling: "+str(percent_polling))
print ("N_relevant: "+str(N_relevant))
print ("N_w: "+str(N_w))
print ("N_l: "+str(N_l))
print ("N_1: "+str(N_1))
print ("N_w1: "+str(N_w1))
print ("N_l1: "+str(N_l1))
print ("N_2: "+str(N_2))
print ("N_w2: "+str(N_w2))
print ("N_l2: "+str(N_l2))
# comparison stratum round size (fixed)
n1 = 750
# desired probability of stopping under the alternative hypothesis that
# the contest truly is as announced
stopping_probability = .9
# obtain and print the minerva round size along with pvalues and lambda
minerva_results = find_sample_size_for_stopping_prob_efficiently(stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=minerva_right)
print ("minerva_round_size: "+str(minerva_results['round_size']))
print("combined_pvalue: "+str(minerva_results['combined_pvalue']))
print("comparison pvalue: "+str(minerva_results['comparison_pvalue']))
print("polling pvalue: "+str(minerva_results['polling_pvalue']))
print("alloc_lambda: "+str(minerva_results['alloc_lambda']))
# obtain and print the r2bravo round size along with pvalues and lambda
r2bravo_results = find_sample_size_for_stopping_prob_efficiently_r2bravo(stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=r2bravo_right)
print ("r2bravo_round_size: "+str(r2bravo_results['round_size']))
print("combined_pvalue: "+str(r2bravo_results['combined_pvalue']))
print("comparison pvalue: "+str(r2bravo_results['comparison_pvalue']))
print("polling pvalue: "+str(r2bravo_results['polling_pvalue']))
print("alloc_lambda: "+str(r2bravo_results['alloc_lambda']))
# add this data to the data structure
data['audits'].append({
'percent_polling':percent_polling,
'N_relevant':N_relevant,
'N_w':N_w,
'N_l':N_l,
'N_2':N_2,
'N_1':N_1,
'N_w1':N_w1,
'N_l1':N_l1,
'N_w2':N_w2,
'N_l2':N_l2,
'minerva_round_size':minerva_results['round_size'],
'minerva_combined_pvalue':minerva_results['combined_pvalue'],
'minerva_comparison_pvalue':minerva_results['comparison_pvalue'],
'minerva_polling_pvalue':minerva_results['polling_pvalue'],
'minerva_alloc_lambda':minerva_results['alloc_lambda'],
'r2bravo_round_size':r2bravo_results['round_size'],
'r2bravo_combined_pvalue':r2bravo_results['combined_pvalue'],
'r2bravo_comparison_pvalue':r2bravo_results['comparison_pvalue'],
'r2bravo_polling_pvalue':r2bravo_results['polling_pvalue'],
'r2bravo_alloc_lambda':r2bravo_results['alloc_lambda']
})
# update the file each loop (for convenience of checking progress)
with open(data_file_name, 'w') as outfile:
json.dump(data, outfile, indent=2)