-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_basetable.sql
440 lines (440 loc) · 15.4 KB
/
train_basetable.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
WITH
student AS (
SELECT
id
, CASE
WHEN students_home__latitude__s IS NULL THEN 40.776676
ELSE students_home__latitude__s
END
, CASE
WHEN students_home__longitude__s IS NULL THEN -73.971321
ELSE students_home__longitude__s
END
, best_ever_cy_assignment__c
, best_ever_cy_assignment_rank__c AS accepted_first_rank
, free_uniform_order_date__c AS uniform_ordered_date
, is_english_language_learner__c AS ell_status
, is_homeless__c AS homeless_status
, survey_representative_first_name__c AS recruiter_first_name
, survey_representative_last_name__c AS recruiter_last_name
FROM
sacs_salesforce.school_application_mirror.prod_student__c psc
WHERE
best_ever_cy_assignment_status__c = 'Accepted'
AND archived__c = FALSE
)
, student_contact AS (
SELECT
DISTINCT
org_specific_id__c AS esd_id
, student__c AS student
FROM
sacs_salesforce.school_application_mirror.prod_contact
)
, parent_contact AS (
SELECT
DISTINCT
id AS parent_id
, lastname
, utm_source__c AS utm_source
, utm_medium__c AS utm_medium
, utm_campaign__c AS utm_campaign
, utm_content__c AS utm_content
, uuid__c AS parent_uuid
FROM
sacs_salesforce.school_application_mirror.prod_contact
)
, lottery AS (
SELECT
id
, round_application__c
, account_school__c
, offer_date__c
, min(offer_date__c) OVER(
PARTITION BY round_application__c
) AS first_offer_date
FROM
sacs_salesforce.school_application_mirror.prod_waitlist_school_ranking__c
)
, application AS (
SELECT
id
, name
, student__c
, parent_contact__c
, current_grade__c AS grade
FROM
sacs_salesforce.school_application_mirror.prod_application__c
WHERE
status__c = 'Submitted'
AND (
ineligibility_reason__c IS NULL
OR ineligibility_reason__c NOT IN (
'Age - Too Old', 'Test Application', 'Ineligible', 'Duplicate', 'Age - Too Young', 'No Proof', 'Test Application'
)
)
AND recordtypeid != '0125f000000AoNiAAK'
AND archived__c = FALSE
)
, account AS (
SELECT
id
, name AS accepted_school
FROM
sacs_salesforce.school_application_mirror.prod_account
)
, hed AS (
SELECT
DISTINCT
app_round_application__c
, program_rank__c
, priority_group_verbiage__c AS had_enrolled_sib
FROM
sacs_salesforce.school_application_mirror.prod_hed__application__c
)
, referral AS (
SELECT
application_id
, verified_parent
FROM
raw_data_science.raw_sy_23_24_parent_referal_apps
)
, present_days AS (
SELECT
sa_scholar_id
, count(*) AS days_active
FROM
sacs.fact_daily_scholar_status fdss
WHERE
fdss.date_key >= '2023-08-14'
AND (
attendance_status IN (
'T', 'P'
)
OR excused = TRUE
)
AND in_session = TRUE
GROUP BY
1
)
, attended_five AS (
SELECT
DISTINCT
dsoi.sa_scholar_id
FROM
present_days
INNER JOIN sacs.dim_scholar_other_info dsoi
ON
present_days.sa_scholar_id = dsoi.sa_scholar_id
WHERE
ispreregistered = 'No'
AND days_active >= 5
AND (
exit_reason_id IS NULL
OR exit_reason_id != 78
)
)
, splash AS (
SELECT
SUBSTRING(
prod_splash_tours.salesforce_id::VARCHAR
, "POSITION"(
prod_splash_tours.salesforce_id::VARCHAR
, ':'::VARCHAR
) + 1
, 50
) AS splash_parent_id
,
CASE
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE '%uniform%'::VARCHAR THEN 'Uniform Fitting'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE '%orientation%'::VARCHAR THEN 'Orientation'::VARCHAR
WHEN lower(prod_splash_tours.event_type::VARCHAR) ILIKE '%webinars'::VARCHAR THEN 'Virtual Event'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE 'welcome to success academy with eva moskowitz'::VARCHAR THEN 'Virtual Event'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE 'summer intensive'::VARCHAR THEN 'Summer Intensive'::VARCHAR
ELSE 'In-Person Event'::VARCHAR
END AS event_type_bucket
, MAX(
CASE
WHEN prod_splash_tours.status::VARCHAR = 'rsvp_yes'::VARCHAR OR prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR OR prod_splash_tours.status::VARCHAR = 'checkin_no'::VARCHAR
THEN 1
ELSE 0
END) AS rsvp
, MAX(CASE
WHEN prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR AND event_name = 'Success Academy Orientation' AND event_type = 'Prospective Family In-Person Events'
THEN 1
ELSE 0
END) AS orientation_checkin
, MIN (
CASE
WHEN prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR
AND event_name = 'Success Academy Orientation'
AND event_type = 'Prospective Family In-Person Events'
THEN prod_splash_tours.checked_in
ELSE NULL
END
) AS orientation_check_in_time
, MAX(
CASE
WHEN prod_splash_tours.status::VARCHAR = 'rsvp_yes'::VARCHAR AND event_name = 'Success Academy Orientation' AND event_type = 'Prospective Family In-Person Events'
THEN 1
ELSE 0
END) AS orientation_rsvp
, MIN (
CASE
WHEN prod_splash_tours.status::VARCHAR = 'rsvp_yes'::VARCHAR
AND event_name = 'Success Academy Orientation'
AND event_type = 'Prospective Family In-Person Events'
THEN substring(
date_rsvped
, 1
, 10
)
ELSE NULL
END
) AS orientation_rsvp_date
, MAX(CASE
WHEN prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR AND prod_splash_tours.event_name::VARCHAR LIKE 'Community Support Day%'::VARCHAR
THEN 1
ELSE 0
END) AS community_support_day
, MAX(CASE
WHEN prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR AND prod_splash_tours.event_name::VARCHAR NOT LIKE '%Orientation%' AND (prod_splash_tours.event_name::VARCHAR LIKE '%New Family%'::VARCHAR OR prod_splash_tours.event_name::VARCHAR LIKE 'Success Academy%'::VARCHAR)
THEN 1
ELSE 0
END
) AS new_family_day
, MAX(
CASE
WHEN prod_splash_tours.status::VARCHAR = 'checkin_yes'::VARCHAR OR prod_splash_tours.status::VARCHAR = 'checkin_no'::VARCHAR THEN 1
ELSE 0
END) AS attended
FROM
prod_data_science.prod_splash_tours AS prod_splash_tours
GROUP BY
SUBSTRING(
prod_splash_tours.salesforce_id::VARCHAR
, "POSITION"(
prod_splash_tours.salesforce_id::VARCHAR
, ':'::VARCHAR
) + 1
, 50
)
, 2
,
CASE
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE '%uniform%'::VARCHAR THEN 'Uniform Fitting'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE '%orientation%'::VARCHAR THEN 'Orientation'::VARCHAR
WHEN lower(prod_splash_tours.event_type::VARCHAR) ILIKE '%webinars'::VARCHAR THEN 'Virtual Event'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE 'welcome to success academy with eva moskowitz'::VARCHAR THEN 'Virtual Event'::VARCHAR
WHEN lower(prod_splash_tours.event_name::VARCHAR) ILIKE 'summer intensive'::VARCHAR THEN 'Summer Intensive'::VARCHAR
ELSE 'In-Person Event'::VARCHAR
END
)
,
yield_probability_tours_status AS
(
SELECT
application.name AS application_id
, splash_parent_id
, MAX(COALESCE(orientation_rsvp, 0)) AS orientation_rsvp
, MIN(orientation_rsvp_date) AS orientation_rsvp_date
, MAX(COALESCE(orientation_checkin, 0)) AS orientation_checkin
, MIN(orientation_check_in_time) AS orientation_check_in_time
, MAX(COALESCE(community_support_day, 0)) AS community_support_day
, MAX(COALESCE(new_family_day, 0)) AS new_family_day
, MAX(COALESCE(
CASE
WHEN tours_agg.event_type_bucket = 'Virtual Event'::VARCHAR THEN tours_agg.attended
ELSE 0
END, 0)) AS virtual_event_attended
, MAX(COALESCE(
CASE
WHEN tours_agg.event_type_bucket = 'In-Person Event'::VARCHAR THEN tours_agg.rsvp
ELSE 0
END, 0)) AS in_person_event_rsvp
, MAX(COALESCE(
CASE
WHEN tours_agg.event_type_bucket = 'In-Person Event'::VARCHAR THEN tours_agg.attended
ELSE 0
END, 0)) AS in_person_event_attended
FROM
application
LEFT JOIN splash AS tours_agg
ON
application.parent_contact__c::VARCHAR = tours_agg.splash_parent_id
GROUP BY
1
, 2
)
, unmatched_parent_checkins AS (
SELECT
parent_id AS unmatched_parent_id
FROM
raw_data_science.raw_unmatched_parent_checkins
)
, best_offer AS (
SELECT
first_offer_date
, uniform_ordered_date
, student.id
, students_home__latitude__s
, students_home__longitude__s
, grade
, accepted_school
, CASE WHEN grade SIMILAR TO ('5|6') THEN 1 ELSE 0 END AS summer_intensive
, CASE
WHEN attended_five.sa_scholar_id IS NOT NULL THEN 1
ELSE 0
END AS yield
, datediff(DAY, first_offer_date, '{date}'::DATE) AS days_since_offer
, CASE
WHEN uniform_ordered_date <= '{date}'::DATE THEN 1
ELSE 0
END
AS uniform_ordered
, CASE
WHEN accepted_first_rank = 1 THEN 1
ELSE 0
END AS accepted_first_rank
, CASE
WHEN had_enrolled_sib SIMILAR TO ('Sibling Attending Same School|Sibling Attending Co-Located School|Child of Staff')
THEN 1
ELSE 0
END AS had_enrolled_sib
, CASE
WHEN ell_status = 'Yes' THEN 1
ELSE 0
END AS ell_status
, CASE
WHEN homeless_status IS TRUE THEN 1
ELSE 0
END AS homeless_status
, orientation_rsvp
, CASE
WHEN orientation_rsvp = 1
OR orientation_checkin = 1 THEN 1
ELSE 0
END AS total_orientation_rsvps
, CASE
WHEN unmatched_parent_checkins.unmatched_parent_id IS NOT NULL THEN 1
ELSE orientation_checkin
END AS orientation_checkin
, virtual_event_attended
, in_person_event_attended
, new_family_day
, community_support_day
, recruiter_first_name || ' ' || recruiter_last_name AS recruiter_full_name
, CASE
WHEN verified_parent IN ('Verified') THEN 'Parent Referral'
WHEN lower(recruiter_first_name) LIKE '%quick%' THEN 'Field Team Quick App'
WHEN recruiter_full_name IN (
'Claire Leka', 'Claire Leika', 'Claire Lake', 'Claire Laka', 'Alexandra Kinderman', 'Alex Kinderman', 'Angela Johnson', 'Angela Jhonson', 'Santy Barrera Claire Leka', 'Wilmer Cabral', 'Santy Barrera', 'Laura Anderson', 'Marielis Perez', 'Fatima Akindele', 'Johanna Garcia', 'Huda Ali', 'Fatimat Akindele', 'Latoya Dakins', 'Dorrant Linton', 'Lefran Pierre', 'Naklah Saleh', 'Ralph Poinvil'
) THEN 'Field Team Full App'
WHEN utm_medium IN ('recruiter') THEN 'Field Team Full App'
WHEN utm_source IN ('scholar_recruitment')
AND utm_medium IN (
'print', 'adpartnereblasts', 'MommyPoppins'
) THEN 'Field Team Full App'
WHEN utm_source IN ('scholar_recruitment')
AND utm_medium IN (
'in_person_tours', 'in_person_tour', 'open_house', 'virtual_info_sessions'
) THEN 'Field Team Event Link'
WHEN utm_source IN ('schoar_recruitment') THEN 'Field Team Event Link'
WHEN utm_source IN ('scholar-recruitment')
AND utm_medium IN ('eztext') THEN 'Field Team Event Link'
WHEN utm_source IN ('school_tours')
AND utm_medium IN ('tour_brochure') THEN 'Field Team Event Link'
WHEN utm_source IN (
'braze', 'emma'
)
OR utm_medium IN ('email')
OR utm_campaign LIKE '%sms%'
OR utm_campaign IN ('email') THEN 'Email/SMS'
WHEN utm_source IN ('bing')
AND utm_medium IN ('cpc') THEN 'Bing Branded Search'
WHEN utm_source IN ('FM_FB_IG') THEN 'Paid Meta'
WHEN utm_source IN ('niche') THEN 'Niche'
WHEN utm_source IN ('GreatSchools') THEN 'Great Schools'
WHEN utm_source IN ('offline') THEN 'Offline'
WHEN utm_medium IN (
'social', 'organic-social'
) THEN 'Organic Social'
WHEN utm_source IN ('parent_referral_program') THEN 'Parent Referral'
WHEN utm_source IN ('Bitly')
AND utm_medium IN ('Referral') THEN 'Parent Referral'
WHEN utm_source IN ('tatari_streaming') THEN 'Tatari'
WHEN utm_source IN ('thesis') THEN 'Thesis'
WHEN utm_source IN ('FM_TIKTOK') THEN 'TikTok'
WHEN utm_source IN (
'faq-page', 'app-dashboard'
) THEN 'Website'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc')
AND (
utm_campaign IN ('Brand')
OR utm_campaign LIKE '%|Brand|%'
) THEN 'Google Branded Search'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc')
AND (
utm_campaign IN ('Scholar')
OR utm_campaign LIKE '%|NB|%'
OR utm_campaign LIKE '05052023'
) THEN 'Google Non Brand Search'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc')
AND utm_campaign LIKE '%|PerformanceMax|%' THEN 'Google Performance Max'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc')
AND utm_campaign LIKE '%|Discovery|%' THEN 'Google Discovery'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc')
AND utm_campaign LIKE '%|Video|%' THEN 'Google Video - YouTube'
WHEN utm_source IN ('google')
AND utm_medium IN ('cpc') THEN 'Google Non Brand Search'
ELSE 'Organic / No Tracking'
END AS utm_source_bucketing
FROM
student
INNER JOIN student_contact
ON
student.id = student_contact.student
INNER JOIN application
ON
student.id = application.student__c
INNER JOIN lottery
ON
student.best_ever_cy_assignment__c = lottery.id
AND application.id = lottery.round_application__c
LEFT JOIN account
ON
account.id = lottery.account_school__c
LEFT JOIN hed
ON
application.id = hed.app_round_application__c
AND student.accepted_first_rank = hed.program_rank__c
LEFT JOIN referral
ON
application.name = referral.application_id
LEFT JOIN attended_five
ON
student_contact.esd_id = attended_five.sa_scholar_id
LEFT JOIN parent_contact
ON
parent_contact.parent_id = application.parent_contact__c
LEFT JOIN yield_probability_tours_status
ON
application.name = yield_probability_tours_status.application_id
LEFT JOIN unmatched_parent_checkins
ON
parent_contact.parent_id = unmatched_parent_checkins.unmatched_parent_id
WHERE
utm_source_bucketing != 'Field Team Quick App'
AND first_offer_date <= '{date}'::DATE
)
SELECT
*
--count(*)
FROM
best_offer
WHERE NOT (first_offer_date < '2023-05-01' AND accepted_school IN ('SA Queens Village', 'SA Norwood', 'SA Sheepshead Bay')) ;