-
Notifications
You must be signed in to change notification settings - Fork 1
/
run-data-curation-pipeline.R
171 lines (134 loc) · 6.19 KB
/
run-data-curation-pipeline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# -----------------------------------------------------------------------------
# Raw data extraction
# -----------------------------------------------------------------------------
# Set condition to TRUE when running this script for the first time
if(TRUE){
# This is a very time-intensive operation
source("setup-curated-datasets/unzip.R")
rm(list = ls())
}
# Extract timestamps from cStress feature vector that pertain to when
# Heart Rate was calculated
source("setup-curated-datasets/get-cstress-featurevec.R")
rm(list = ls())
# Extract start-of-day button press data
source("setup-curated-datasets/get-start-day-button-press-data.R")
rm(list = ls())
# Extract predictions of activity detection algorithm
source("setup-curated-datasets/get-activity-data.R")
rm(list = ls())
# Extract raw data on micro-randomization
source("setup-curated-datasets/get-randomization-data.R")
rm(list = ls())
# Extract raw data on stress episode classification
source("setup-curated-datasets/get-stress-episode-data.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Create a data frame containing a list of ID's of all participants
# who were enrolled into the Sense2Stop study, and then determine whether
# they meet any of the criteria for them to not be included in any analyses
# -----------------------------------------------------------------------------
source("setup-curated-datasets/create-masterlist.R")
rm(list = ls())
# Note that the next script requires the output of
# get-randomization-data.R and get-stress-episode-data.R
source("setup-curated-datasets/create-masterlist-continued.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Calculate summary statistics using unzipped stress data among those
# participants who were not excluded due to C1, C2, C3, C4
# -----------------------------------------------------------------------------
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-unzipped-stress-data.R")
rm(list = ls())
}
# -----------------------------------------------------------------------------
# Create two data frames in long format
# (i) each row pertains to a participant-day
# (ii) each row pertains to a participant-day-minute
# -----------------------------------------------------------------------------
# Note that the "skeleton" produced will not include those participants excluded
# from all data analysis, i.e., it will only include the remaining 49 participants
source("setup-curated-datasets/create-skeleton.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Create a data frame in long format with the Yit's
# -----------------------------------------------------------------------------
# Note that this script is where episodes (all types) for which peak
# is not within first day of mrt and last day of mrt are removed prior to
# all subsequent data processing steps
# Note that the output produced by this script will not include those participants excluded
# from all data analysis, i.e., it will only include the remaining 49 participants
source("setup-curated-datasets/construct-episodes-physical-activity.R")
rm(list = ls())
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-episode-length.R")
rm(list = ls())
}
source("setup-curated-datasets/construct-heart-rate-indicators.R")
rm(list = ls())
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-existence-heart-rate-data.R")
rm(list = ls())
}
source("setup-curated-datasets/censor-episodes.R")
rm(list = ls())
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-after-censoring.R")
rm(list = ls())
}
source("setup-curated-datasets/censor-more-episodes.R")
rm(list = ls())
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-after-more-censoring.R")
rm(list = ls())
}
source("setup-curated-datasets/construct-minute-by-minute-classification.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Merge randomization assignment data with minute-by-minute episode
# classification data; begin creating the stratification variable
# -----------------------------------------------------------------------------
source("setup-curated-datasets/construct-stratification-variable.R")
rm(list = ls())
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-availability.R")
rm(list = ls())
}
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-stratification-variable.R")
rm(list = ls())
}
# -----------------------------------------------------------------------------
# Merge randomization assignment data with minutes between first day
# and last day of the MRT
# -----------------------------------------------------------------------------
source("setup-curated-datasets/link.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Check trimming of probabilities
# -----------------------------------------------------------------------------
# Set condition to TRUE when running this script for the first time
if(TRUE){
source("check-intermediate-datasets/check-trim.R")
rm(list = ls())
}
# -----------------------------------------------------------------------------
# Complete construction of the variables used for basic data analysis
# -----------------------------------------------------------------------------
source("setup-curated-datasets/construct-data-for-analysis.R")
rm(list = ls())
# -----------------------------------------------------------------------------
# Construct more variables
# -----------------------------------------------------------------------------
source("setup-curated-datasets/construct-engagement-indicators.R")
rm(list = ls())
source("setup-curated-datasets/construct-predictors.R")
rm(list = ls())