3) ProPerlyAsked visualization.Rmd

# ProPer visualization (III): *Periograms*

Adjust the periodic energy and FO curves and create their visual interaction, a.k.a. *Periogram*.

```{r clean_start}
rm(list = ls())

## Load required libraries 
require(tidyverse)
require(Cairo)
require(zoo)
require(seewave)
# require(signal)

## Read the previous table: raw_df
raw_df <- read.csv("data_tables/raw_df.csv") %>% distinct(file, t, .keep_all = TRUE)

## Switch to current table: main_df
main_df <- droplevels(raw_df)

## Calculate total-power and periodic-power
main_df <- mutate(group_by(main_df,file),
                  totalPower = round(4e-10 * 10^(intensity / 10), 9),
                  periodicPower = round(totalPower * periodicStrength, 9)
                  )

## get the filemames list 
files <- main_df$file
files <- files[!duplicated(files)==TRUE]

```

## Prepare periodic energy & F0 curves:

### Presets

1. Adjust 'perFloor' and 'relTo'.[^1]
2. Run the entire chunk.
3. Run the following plotting chunk and use the resulting plots to refine an initial optimal 'perFloor' value for the entire dataset -- the ensuing chunk is designed to map unique 'perFloor' values for selected speakers (or tokens) if needed.

[^1]: Set 'perFloor' to determine the floor (zero) when log-transforming "periodic power" ('postPP') into "periodic energy" ('logPP' and then "smogPP' for smoothed variants). The 'perFloor' value (between 0--1) reflects a percentage from the bottom of the transformed 'postPP' scale. For example, "perFloor <- 0.01" means that values in the bottom 1% of the 'postPP' scale will end up below the threshold (they will be negative in the function and will appear as zero in the transformed 'logPP' and 'smogPP' vectors). Note that the 'perFloor' setting can make a relatively big difference!

```{r presets_main_df, warning=FALSE}
#
##    Adjust 'perFloor' to find the single most fitting value 
# (you can choose multiple values for different speakers and/or tokens later):
perFloor <- .05 # {.001 -- .05}; fitting values should be likely found between 0.1% (0.001) and 5% (0.05) 
#
##    Anchor for relative measurements: 
# Relative to the entire data ('data'), to same speaker ('speaker'), or to each token by itself ('token').
# Preferably choose "speaker" "[2]" if applicable:
relTo <- c("data", "speaker", "token")[3] # {[1] -- [3]}; change the number in [squared brackets].
#
sourceF0 <- c("Praat","CC")[1]

```

### Compute new variables

```{r new_variables_main_df, warning=FALSE}
#
## F0 interpolating, stretching and smoothing: mutate by token
main_df <- mutate(
  group_by(main_df, file),
  ##  f0_smooth_stretch ('f0_smootch')
  # f0_smootch = ifelse(
  #   (is.na(f0_smooth) & t<min(t[which(!is.na(f0_smooth))])),
  #   f0_smooth[min(which(!is.na(f0_smooth)))], ifelse(
  #     (is.na(f0_smooth) & t>=max(t[which(!is.na(f0_smooth))])),
  #     f0_smooth[max(which(!is.na(f0_smooth)))], f0_smooth)),
  ##  f0_smooth_stretch_interp ('f0_smootchInterp')
  # f0_smootchInterp = na.approx(f0_smootch, na.rm=F),

  ##  f0 post smooth_stretch_interp_smooth ('f0Post')
    # f0_CCPost = round(bwfilter(wave = f0_CCsmootchInterp, f = 1000, to = 12, n = 1, output = "Sample")),
    # f0_CCPost = f0_CCsmootchInterpLin,
    # f0_CCPost = round(f0_CCsmootchInterpLin,2),
    # f0Post = round(bwfilter(wave = f0_smootchInterp, f = 1000, to = 12, n = 1, output = "Sample")),
    # f0Post = f0_smootchInterpLin,
    f0Post = f0_smootchInterpSplin,
    # f0Post = round(f0_smootchInterpLin,2),
  
  #  f0 post without outer stretches ('f0Post_dropStretch')
  f0Post_dropStretch = ifelse(
  is.na(na.approx(f0_smooth, na.rm=F)), NA, f0Post),
  ##  f0 post without interpolation ('f0Post_dropInterp')
  f0Post_dropInterp = ifelse(
  is.na(f0_smooth), NA, f0Post)
  )
## mutate the entire data set
main_df <- mutate(
  group_by(main_df),
  ## keep records of adjusted variables
  perFloorStatus = perFloor, 
  relToStatus = relTo,
  ## find values for entire data set
  max_data_per_power = max(periodicPower, na.rm=T),
  max_data_strength = max(periodicStrength, na.rm=T),
  
  f0_data_min = ifelse(sourceF0=="Praat", round(min(f0Post_dropInterp, na.rm=T)), round(min(f0_CC, na.rm=T))),
  f0_data_max = ifelse(sourceF0=="Praat", round(max(f0Post_dropInterp, na.rm=T)), round(max(f0_CC, na.rm=T))),

  f0_data_range = round(f0_data_max - f0_data_min)
  )
## find values for each token 
main_df <- mutate(
  group_by(main_df, file),
  max_token_per_power = max(periodicPower, na.rm=T),
  max_token_strength = max(periodicStrength, na.rm=T),
  
  f0_token_min = ifelse(sourceF0=="Praat", round(min(f0Post_dropInterp, na.rm=T)), round(min(f0_CC, na.rm=T))),
  f0_token_max = ifelse(sourceF0=="Praat", round(max(f0Post_dropInterp, na.rm=T)), round(max(f0_CC, na.rm=T))),
  f0_token_median = ifelse(sourceF0=="Praat", round(median(f0Post_dropInterp, na.rm = T)), round(median(f0_CC, na.rm = T))),
  f0_token_mean = ifelse(sourceF0=="Praat", round(mean(f0Post_dropInterp, na.rm = T)), round(mean(f0_CC, na.rm = T))),

  f0_token_range = round(f0_token_max - f0_token_min)
  )
## find values for speaker-defined sets (if exist)
if(length(main_df$speaker)>0) main_df <- mutate(group_by(main_df, speaker),
  max_speaker_per_power = max(periodicPower, na.rm=T),
  max_speaker_strength = max(periodicStrength, na.rm=T),
  
  f0_speaker_min = ifelse(sourceF0=="Praat", round(min(f0Post_dropInterp, na.rm=T)), round(min(f0_CC, na.rm=T))),
  f0_speaker_max = ifelse(sourceF0=="Praat", round(max(f0Post_dropInterp, na.rm=T)), round(max(f0_CC, na.rm=T))),

  f0_speaker_min_median = round(median(f0_token_min, na.rm=T)),
  f0_speaker_max_median = round(median(f0_token_max, na.rm=T)),
  
  f0_speaker_median = ifelse(sourceF0=="Praat", round(median(f0Post_dropInterp, na.rm = T)), round(median(f0_CC, na.rm = T))),
  f0_speaker_mean = ifelse(sourceF0=="Praat", round(mean(f0Post_dropInterp, na.rm = T)), round(mean(f0_CC, na.rm = T))),

  f0_speaker_range = round(f0_speaker_max - f0_speaker_min),
  f0_speaker_range_median = round(f0_speaker_max_median - f0_speaker_min_median)
  )
## mutate by token
main_df <- mutate(
  group_by(main_df, file),
  ## variables for plot normalization
  plotFloorToken = round(f0_token_min - f0_token_range),
  plotFloorSpeaker = round(f0_speaker_min - f0_speaker_range),
  plotFloorData = round(f0_data_min - f0_data_range),
  ## conclude relative anchors and thresholds
  perFloor_indeed = ifelse(
    relTo=="token", round(max_token_per_power * perFloor, 10), ifelse(
      relTo=="data", round(max_data_per_power * perFloor, 10), 
      round(max_speaker_per_power * perFloor, 10))),
  strengThresh_indeed = ifelse(
    relTo=="token", round(max_token_strength * 0.25, 8), ifelse(
      relTo=="data", round(max_data_strength * 0.25, 8),
      round(max_speaker_strength * 0.25, 8))),
  ## create new periodic power vector
  periodicFraction = ifelse(periodicStrength < strengThresh_indeed, 0, periodicStrength),
  # periodicFraction = ifelse(periodicStrength < 0.25, 0, periodicStrength),
  postPP = round(totalPower * periodicFraction, 9),
  ## log periodic power
  logPP = 10*log10(postPP/perFloor_indeed)
  )
## convenient variants
main_df <- mutate(
  group_by(main_df, file),
  ## clean up negatives, infinites and NAs after log transform
  logPP = ifelse(logPP<0 | is.na(logPP) | is.infinite(logPP), 0, logPP),
  ## create relative scales (0--1) 
  intensityRel = ifelse(
    intensity<0, 0, round(intensity / max(intensity, na.rm=TRUE), 5)), 
  totalPowerRel = ifelse(
    totalPower<0, 0, round(totalPower / max(totalPower, na.rm=TRUE), 5)), 
  postPP_rel = ifelse(
    postPP<0, 0, round(postPP / max(postPP, na.rm=TRUE), 5)),
  logPP_rel = round(logPP / max(logPP, na.rm=TRUE), 5)
  )
#
########## periodic energy smoothing (log+smooth = smog)
main_df <- mutate(
  group_by(main_df, file),
  ### 20Hz low-pass filter (50ms intervals): "segmental smooth"
  smogPP_20Hz = bwfilter(wave = logPP, f = 1000, to = 20, n = 2, output = "Sample"),
  ### 12Hz low-pass filter (~83.3ms  intervals): "seg-syll smooth"
  smogPP_12Hz = bwfilter(wave = logPP, f = 1000, to = 12, n = 2, output = "Sample"),
  ### 8Hz low-pass filter (125ms  intervals): "syll-seg smooth"
  smogPP_8Hz = bwfilter(wave = logPP, f = 1000, to = 8, n = 2, output = "Sample"),
  ### 5Hz low-pass filter (200ms  intervals): "syllabic smooth"
  smogPP_5Hz = bwfilter(wave = logPP, f = 1000, to = 5, n = 2, output = "Sample")
  )
### tidy up: zero negatives and transform positives to a 0--1 scale 
main_df <- mutate(
  group_by(main_df, file),
  smogPP_20Hz = ifelse(
    smogPP_20Hz < 0, 0, round(smogPP_20Hz / max(smogPP_20Hz,na.rm=T), 5)),
  smogPP_12Hz = ifelse(
    smogPP_12Hz < 0, 0, round(smogPP_12Hz / max(smogPP_12Hz, na.rm=T), 5)),
  smogPP_8Hz = ifelse(
    smogPP_8Hz < 0, 0, round(smogPP_8Hz / max(smogPP_8Hz, na.rm=T), 5)),
  smogPP_5Hz = ifelse(
    smogPP_5Hz < 0, 0, round(smogPP_5Hz / max(smogPP_5Hz, na.rm=T), 5))
  )

```

## plot selected tokens (detailed review of the data)

Use the comment in/out (#) to toggle visualization of different data components.

```{r plot_singles, warning=FALSE, echo=FALSE}

### chosse the f0 scale for the y-axis in the plots
yScale1 <- c('tokenScale', 'speakerScale', 'dataScale')[1]

##################################
########### loop start ###########
plyr::ldply(files, function(f){
sel_file1 <- f
##################################

#####################################
###### manual singles, no-loop ######
# sel_file1 <- files[1] # or: "filename"
#####################################

single_token1 <- dplyr::filter(main_df, file==sel_file1)

plotFloor1 <- ifelse(yScale1 == 'tokenScale', single_token1$plotFloorToken[1],
                     ifelse(yScale1 == 'speakerScale', single_token1$plotFloorSpeaker[1],
                            ifelse(yScale1 == 'dataScale', single_token1$plotFloorData[1], -275)))
plotUnits1 <- ifelse(yScale1 == 'tokenScale', round(single_token1$f0_token_range[1]/30),
                     ifelse(yScale1 == 'speakerScale', round(single_token1$f0_speaker_range[1]/30),
                            ifelse(yScale1 == 'dataScale', round(single_token1$f0_data_range[1]/30), 12)))
f0range1 <- ifelse(yScale1 == 'tokenScale', single_token1$f0_token_range[1],
                     ifelse(yScale1 == 'speakerScale', single_token1$f0_speaker_range[1],
                            ifelse(yScale1 == 'dataScale', single_token1$f0_data_range[1], 350)))
f0max1 <- ifelse(yScale1 == 'tokenScale', single_token1$f0_token_max[1],
                     ifelse(yScale1 == 'speakerScale', single_token1$f0_speaker_max[1],
                            ifelse(yScale1 == 'dataScale', single_token1$f0_data_max[1], 425)))

periogram_single1 <-
  ggplot(single_token1, aes(x=t)) +
########## F0 curves
## pre-smoothed F0 (from pitch object)
  # geom_point(aes(y=f0_raw),color="green", alpha=.5, size=.5) +
## smoothed F0 (from pitch tier)
  # geom_point(aes(y=f0_smooth),color="blue3", alpha=0.3, size=0.3) +
  # geom_point(aes(y=f0_smootchInterp),color="yellow", alpha=0.3, size=0.3) +
  # geom_point(aes(y=f0Post),color="red", alpha=0.3, size=0.3) +
## periogram (smogPP)
  # if(sourceF0=="CC") 
    # geom_line(aes(y=f0_CCPost),color="magenta2", alpha=single_token1$smogPP_20Hz, size=single_token1$smogPP_20Hz*5) +
  # if(sourceF0=="Praat") 
    geom_line(aes(y=f0Post),color="magenta2", alpha=single_token1$smogPP_20Hz, size=single_token1$smogPP_20Hz*5) +
#
########## Power/intensity
## intensity
  # geom_line(aes(y=intensityRel*f0range1+plotFloor1),color="gold", alpha=.6, size=.5) +
## power
  # geom_line(aes(y=totalPowerRel*f0range1+plotFloor1),color="cornflowerblue", alpha=.5, size=.5, linetype="dashed") +
#  
########## Periodic fraction /similarity index (strength/HNR)
## raw strength (before "strengThresh")
  # geom_line(aes(y=periodicStrength*f0range1+plotFloor1), color="green", alpha=.2, size=.75, linetype="twodash") +
## processed strength  (after "strengThresh")
  # geom_line(aes(y=periodicFraction*f0range1+plotFloor1), color="tomato", alpha=.7, size=.5, linetype="dotted") +
#
########## Periodic power 'pp' (total power * periodic fraction)
  geom_line(aes(y=postPP_rel*f0range1+plotFloor1),color="purple3", alpha=.5, size=.5, linetype="solid") +
#
########## Log periodic power 'logPP' (10*log10(PER/per_thresh))
  # geom_line(aes(y=logPP_rel*f0range1+plotFloor1),color="seashell", alpha=.3, size=2, linetype="longdash") +
#
########## Smoothed logPP 'smogPP' (4 smoothing flavors: 5/ 8/ 12/ 20 Hz low-pass filter)
  geom_line(aes(y=smogPP_20Hz*f0range1+plotFloor1),color="lightsteelblue", alpha=.5, size=.75) +
  # geom_line(aes(y=smogPP_12Hz*f0range1+plotFloor1),color="lightyellow", alpha=.6, size=1) +
  # geom_line(aes(y=smogPP_8Hz*f0range1+plotFloor1),color="moccasin", alpha=.4, size=1.5) +
  # geom_line(aes(y=smogPP_5Hz*f0range1+plotFloor1),color="rosybrown1", alpha=.3, size=2) +
  # geom_line(aes(y=smogPP_5Hz_2*f0range1+plotFloor1+5),color="yellow", alpha=.6, size=1) +
  # geom_line(aes(y=smogPP_5Hz_4*f0range1+plotFloor1+10),color="orange", alpha=.6, size=1) +
  # geom_line(aes(y=smogPP_5Hz_8*f0range1+plotFloor1+15),color="red", alpha=.6, size=1) +
#  
########## TextGrids boundaries and annotations (comment out if not available)
## boundaries
  {if(length(single_token1$syll_bounds)>0) geom_vline(aes(xintercept=single_token1$syll_bounds), linetype="dotted", color="white", size=.5, alpha=.5)} +
## annotations
  {if(length(single_token1$syll_mid)>0) geom_text(aes(x=single_token1$syll_mid, y=f0max1+plotUnits1*2, label=as.character(syll_label), check_overlap=T), size=3, color="white", family= "Helvetica")} + 
## plot stuff
  ggtitle(paste0(sel_file1)) +  
  xlab("Time (ms)") + ylab("F0 (Hz)") +
  ylim(plotFloor1,f0max1+plotUnits1*2) +
  theme(plot.title = element_text(colour = "gray"), panel.background = element_blank(), plot.background = element_rect(fill = "black"), panel.grid = element_blank(), axis.title = element_text(colour = "gray"), axis.ticks = element_blank())
print(periogram_single1)
##--save?
ggsave(periogram_single1,file=paste0("plots/",sel_file1,"_perTest(",perFloor,")_",yScale1,".pdf"),device=cairo_pdf)

##################################
############ loop end ############
})
##################################

```

## re-adjust the perFloor value for selected speakers 
(this can be used also for specific tokens but in typical scenarios it should make more sense to have a single value for a given recording condition, i.e. for speakers, not for tokens) 

```{r readjust_singles, warning=FALSE, echo=FALSE}

#### change the perFloor of specific tokens
main_df2 <- mutate(
  group_by(main_df, file),
  perFloorStatus = ifelse(
    file == "cut-SFGTV_20201206_130000_Entertainment_Commission" | file == "cut-CNBC_20181105_220000_Fast_Money" | file == "cut-CNNW_20180401_060000_Amanpour_Sex_and_Love_Around_the_World" | file == "cut-CNNW_20190926_010000_Cuomo_Prime_Time" | file == "cut-CSPAN_20210613_110000_Washington_Journal_06132021_a" | file == "cut-CSPAN_20210613_110000_Washington_Journal_06132021_b" | file == "cut-CSPAN2_20180204_100700_Authors_Dave_Barry_and_Scott_Turow" | file == "cut-CSPAN3_20190416_080900_President-Elect_Lincoln" | file == "cut-FOXNEWSW_20120808_060000_The_Five" | file == "cut-GBN_20221216_190000_Laurence_Fox" | file == "cut-KNTV_20210908_073700_Late_Night_With_Seth_Meyers" | file == "cut-SFGTV_20101023_013000" | file == "cut-SFGTV_20171219_120000_Government_Access_Programming" | file == "cut-SFGTV_20191109_000000_Government_Access_Programming", 
    0.001, perFloorStatus))
# 
main_df2 <- mutate(
  group_by(main_df2, file),
  perFloorStatus = ifelse(
    file == "cut-ALJAZAM_20160105_010000_News" | file == "cut-BBCNEWS_20210930_050000_Breakfast" | file == "cut-CNBC_20090917_000000_CNBC_Reports" | file == "cut-CNBC_20100120_160000_The_Call" | file == "cut-CNBC_20120202_190000_Street_Signs" | file == "cut-CNBC_20180222_100000_Worldwide_Exchange" | file == "cut-CNN_20100420_010000_Larry_King_Live" | file == "cut-CNNW_20130423_010000_Piers_Morgan_Live" | file == "cut-CNNW_20131221_230000_CNN_Newsroom" | file == "cut-CNNW_20160226_180000_Wolf" | file == "cut-CNNW_20160313_200000_CNN_Newsroom_With_Fredricka_Whitfield" | file == "cut-CNNW_20160707_230000_Erin_Burnett_OutFront" | file == "cut-CNNW_20161110_020000_Anderson_Cooper_360" | file == "cut-CNNW_20221107_120000_CNN_This_Morning" | file == "cut-CNNW_20230225_030000_CNN_Tonight" | file == "cut-COM_20111208_020000_The_Daily_Show_With_Jon_Stewart" | file == "cut-COM_20210916_060000_The_Daily_Show_With_Trevor_Noah" | file == "cut-COM_20220218_070000_The_Daily_Show_With_Trevor_Noah" | file == "cut-CSPAN_20100411_100000_C-SPAN_Weekend" | file == "cut-CSPAN_20111105_010000_The_Contenders" | file == "cut-CSPAN_20140615_230000_Washington_This_Week" | file == "cut-CSPAN_20140829_110000_Washington_Journal" | file == "cut-CSPAN_20170221_140000_Washington_Journal_News_Headlines_and_Viewer_Calls" | file == "cut-CSPAN_20181102_015800_Campaign_2018_Oprah_Winfrey_at_Rally_for_Stacey_Abrams" | file == "cut-CSPAN_20191203_071100_Attorneys_in_Gun_Rights_Supreme_Court_Case" | file == "cut-CSPAN_20220613_165600_Washington_Journal_Open_Phones" | file == "cut-CSPAN_20220819_064300_Campaign_2022_PA_Senate_Candidate_Dr_Mehmet_Oz_Speaks_in_Philadelphia" | file == "cut-CSPAN_20230730_110000_Washington_Journal_07302023_a" | file == "cut-CSPAN_20230730_110000_Washington_Journal_07302023_b" | file == "cut-CSPAN2_20090823_230000_Book_TV" | file == "cut-CSPAN2_20101219_051500_Book_TV" | file == "cut-CSPAN2_20110130_130000_Book_TV" | file == "cut-CSPAN2_20170528_120100_Authors_Discuss_the_Republican_Party" | file == "cut-CSPAN2_20201113_212500_Juliet_SchorAfter_the_Gig", 
    0.01, perFloorStatus)
  )
  
main_df2 <- mutate(
  group_by(main_df2, file),
  perFloorStatus = ifelse(
    file == "cut-CSPAN2_20211222_130200_Author_Discussion_on_the_Environment_and_Sustainability" | file == "cut-CSPAN3_20160824_000000_Writing_Presidential_Biographies" | file == "cut-CSPAN3_20181227_174400_A_Conversation_on_Presidents__the_Press" | file == "cut-CSPAN3_20210213_005900_American_Artifacts_Jim_Crow_Museum_of_Racist_Memorabilia" | file == "cut-FBC_20140922_200000_After_the_Bell" | file == "cut-FOXNEWS_20110817_070000_Red_Eye" | file == "cut-FOXNEWSW_20120626_040000_Hannity" | file == "cut-FOXNEWSW_20130116_210000_Your_World_With_Neil_Cavuto" | file == "cut-FOXNEWSW_20131029_210000_The_Five" | file == "cut-FOXNEWSW_20150501_050000_Hannity" | file == "cut-FOXNEWSW_20180205_170000_Outnumbered" | file == "cut-FOXNEWSW_20181220_170000_Outnumbered" | file == "cut-FOXNEWSW_20210916_030000_Gutfeld" | file == "cut-GBN_20230124_000000_Farage_Replay" | file == "cut-KGO_20131123_073500_Jimmy_Kimmel_Live" | file == "cut-KNTV_20130409_073500_Late_Night_With_Jimmy_Fallon" | file == "cut-KNTV_20191026_073700_Late_Night_With_Seth_Meyers" | file == "cut-KNTV_20230812_073700_Late_Night_With_Seth_Meyers" | file == "cut-KPIX_20200324_063500_The_Late_Show_With_Stephen_Colbert" | file == "cut-KTVU_20160518_000000_KTVU_Fox_2_News_at_5pm" | file == "cut-KYW_20170520_033500_The_Late_Show_With_Stephen_Colbert" | file == "cut-MSNBCW_20120524_200000_The_Dylan_Ratigan_Show" | file == "cut-MSNBCW_20160729_170000_The_Place_for_Politics_2016" | file == "cut-MSNBCW_20180105_140000_MSNBC_Live_With_Stephanie_Ruhle" | file == "cut-MSNBCW_20190314_200000_Deadline_White_House" | file == "cut-SFGTV_20121120_133000" | file == "cut-SFGTV_20130125_073000" | file == "cut-SFGTV_20130219_170000" | file == "cut-SFGTV_20180210_000000_Government_Access_Programming" | file == "cut-SFGTV_20181021_220000_Government_Access_Programming" | file == "cut-SFGTV_20181024_180000_Government_Access_Programming" | file == "cut-SFGTV_20191024_050000_Government_Access_Programming" | file == "cut-SFGTV_20191123_120000_Government_Access_Programming" | file == "cut-SFGTV_20220930_230000_Board_of_Appeals" | file == "cut-WPVI_20170802_220000_Action_News_6PM" | file == "cut-WRC_20120927_200000_News_4_at_4" | file == "cut-WUSA_20100820_210000_9News_Now_at_5pm", 
    0.01, perFloorStatus)
  )

main_df2 <- mutate(
  group_by(main_df2, file),
  perFloor_indeed = ifelse(relTo=="token", round(max_token_per_power * perFloorStatus, 10), ifelse(relTo=="data", round(max_data_per_power * perFloorStatus, 10), round(max_speaker_per_power * perFloorStatus, 10))),
  ## log periodic power
  logPP = 10*log10(postPP/perFloor_indeed),
  logPP = ifelse(logPP<0 | is.na(logPP), 0, logPP),
  ########## periodic energy smoothing (log+smooth = smog)
  ### 20Hz low-pass filter (50ms intervals): "segmental smooth"
  smogPP_20Hz = bwfilter(wave = logPP, f = 1000, to = 20, n = 2, output = "Sample"),
  ### 12Hz low-pass filter (~83.3ms  intervals): "seg-syll smooth"
  smogPP_12Hz = bwfilter(wave = logPP, f = 1000, to = 12, n = 2, output = "Sample"),
  ### 8Hz low-pass filter (125ms  intervals): "syll-seg smooth"
  smogPP_8Hz = bwfilter(wave = logPP, f = 1000, to = 8, n = 2, output = "Sample"),
  ### 5Hz low-pass filter (200ms  intervals): "syllabic smooth"
  smogPP_5Hz = bwfilter(wave = logPP, f = 1000, to = 5, n = 2, output = "Sample")
  )
### tidy up: zero negatives and transform positives to a 0--1 scale 
main_df2 <- mutate(
  group_by(main_df2, file),
  smogPP_20Hz = ifelse(
    smogPP_20Hz < 0, 0, round(smogPP_20Hz / max(smogPP_20Hz,na.rm=T), 5)),
  smogPP_12Hz = ifelse(
    smogPP_12Hz < 0, 0, round(smogPP_12Hz / max(smogPP_12Hz, na.rm=T), 5)),
  smogPP_8Hz = ifelse(
    smogPP_8Hz < 0, 0, round(smogPP_8Hz / max(smogPP_8Hz, na.rm=T), 5)),
  smogPP_5Hz = ifelse(
    smogPP_5Hz < 0, 0, round(smogPP_5Hz / max(smogPP_5Hz, na.rm=T), 5))
  )

```

Exclusions due to wacky F0 contours (1 item)

```{r exclude}

main_df2 <- dplyr::filter(main_df2, file != "cut-SFGTV_20220930_230000_Board_of_Appeals")

## get the filemames list
files2 <- main_df2$file
files2 <- files2[!duplicated(files2)==TRUE]

```


## re-plot after changes (if relevant)

```{r re-plot, warning=FALSE, echo=FALSE}

# ## get the filemames list
# files2 <- main_df2$file
# files2 <- files2[!duplicated(files2)==TRUE]

  ### chosse the f0 scale for the y-axis in the plots
yScale2 <- c('tokenScale', 'speakerScale', 'dataScale')[1]

##################################
########### loop start ###########
plyr::ldply(files2, function(f){
sel_file2 <- f
##################################

#####################################
###### manual singles, no-loop ######
# sel_file2 <- files2[3] # or: "filename"
#####################################

single_token2 <- dplyr::filter(main_df2, file==sel_file2)

plotFloor2 <- ifelse(yScale2 == 'tokenScale', single_token2$plotFloorToken[1],
                     ifelse(yScale2 == 'speakerScale', single_token2$plotFloorSpeaker[1],
                            ifelse(yScale2 == 'dataScale', single_token2$plotFloorData[1], -275)))
plotUnits2 <- ifelse(yScale2 == 'tokenScale', round(single_token2$f0_token_range[1]/30),
                     ifelse(yScale2 == 'speakerScale', round(single_token2$f0_speaker_range[1]/30),
                            ifelse(yScale2 == 'dataScale', round(single_token2$f0_data_range[1]/30), 12)))
f0range2 <- ifelse(yScale2 == 'tokenScale', single_token2$f0_token_range[1],
                     ifelse(yScale2 == 'speakerScale', single_token2$f0_speaker_range[1],
                            ifelse(yScale2 == 'dataScale', single_token2$f0_data_range[1], 350)))
f0max2 <- ifelse(yScale2 == 'tokenScale', single_token2$f0_token_max[1],
                     ifelse(yScale2 == 'speakerScale', single_token2$f0_speaker_max[1],
                            ifelse(yScale2 == 'dataScale', single_token2$f0_data_max[1], 425)))

periogram_single2 <-
  ggplot(single_token2, aes(x=t)) +
########## F0 curves
  # geom_point(aes(y=f0_smooth),color="blue3", alpha=0.3, size=0.3) +
## periogram (smogPP)
  # geom_line(aes(y=f0_CCPost),color="magenta2", alpha=single_token2$smogPP_20Hz, size=single_token2$smogPP_20Hz*5) +
  geom_line(aes(y=f0Post),color="magenta2", alpha=single_token2$smogPP_20Hz, size=single_token2$smogPP_20Hz*5) +
########## Periodic power 'pp' (total power * periodic fraction)
  # geom_line(aes(y=postPP_rel*f0range2+plotFloor2),color="purple3", alpha=.5, size=.5, linetype="solid") +
########## Log periodic power 'logPP' (10*log10(PER/per_thresh))
  # geom_line(aes(y=logPP_rel*f0range2+plotFloor2),color="seashell", alpha=.3, size=2, linetype="longdash") +
########## Smoothed logPP 'smogPP' (4 smoothing flavors: 5/ 8/ 12/ 20 Hz low-pass filter)
  geom_line(aes(y=smogPP_20Hz*f0range2+plotFloor2),color="lightsteelblue", alpha=.5, size=.75) +
  geom_line(aes(y=smogPP_12Hz*f0range2+plotFloor2),color="lightyellow", alpha=.6, size=1) +
  # geom_line(aes(y=smogPP_8Hz*f0range2+plotFloor2),color="moccasin", alpha=.4, size=1.5) +
  geom_line(aes(y=smogPP_5Hz*f0range2+plotFloor2),color="rosybrown1", alpha=.3, size=2) +
########## TextGrids boundaries and annotations (comment out if not available)
## boundaries
  {if(length(single_token2$syll_bounds)>0) geom_vline(aes(xintercept=single_token2$syll_bounds), linetype="dotted", color="white", size=.5, alpha=.5)} +
## annotations
  {if(length(single_token2$syll_mid)>0) geom_text(aes(x=single_token2$syll_mid, y=f0max2+plotUnits2*2, label=as.character(syll_label), check_overlap=T), size=3, color="white", family= "Helvetica")} + 
## plot stuff
  ggtitle(paste0(sel_file2)) +  
  xlab("Time (ms)") + ylab("F0 (Hz)") +
  ylim(plotFloor2,f0max2+plotUnits2*2) +
  theme(plot.title = element_text(colour = "gray"), panel.background = element_blank(), plot.background = element_rect(fill = "black"), panel.grid = element_blank(), axis.title = element_text(colour = "gray"), axis.ticks = element_blank())
print(periogram_single2)
##--save?
ggsave(periogram_single2,file=paste0("plots/",sel_file2,"_PERIOGRAM(",single_token2$perFloorStatus[1],")_",yScale2,".pdf"),device=cairo_pdf)
                                                                                                                                                                 
##################################
############ loop end ############
})
##################################

```

## Minimize main_df2 table

```{r minimize_main_df}

## get rid of some variables
mini_main_df <- droplevels(subset(main_df2, select = -c(intensity, periodicStrength, totalPower, periodicPower, max_data_per_power, max_data_strength, max_speaker_per_power, max_speaker_strength, max_token_per_power, max_token_strength, perFloor_indeed, periodicFraction, postPP, logPP)))

```

## Write main_df table

```{r write_main_df}
## Write the main data file
write.csv(mini_main_df, "data_tables/main_df.csv", row.names=FALSE)
```