added twilight processing code and a couple files needed for dive pro…

…cessing
pointblue · May 22, 2023 · 784760d · 784760d
1 parent b9473f6
commit 784760d
Show file tree

Hide file tree

Showing 5 changed files with 1,102 additions and 0 deletions.
diff --git a/code/geoloc_code/GDR_analysis_SGAT_1617_preprocess.R b/code/geoloc_code/GDR_analysis_SGAT_1617_preprocess.R
@@ -0,0 +1,235 @@
+# Script for pre-processing light-level geolocation data from 1617
+# Setting up to access files directly from AWS
+### Load packages
+rm(list = ls())
+library(aws.s3)
+library(stringr)
+library(data.table)
+library(dplyr)
+library(tidyr)
+library(fasttime)
+library(lubridate)
+library(devtools)
+library(GeoLocTools)
+setupGeolocation()
+library(probGLS)
+library(TwGeos)
+library(ggplot2)
+library(geosphere)
+library(dplyr)
+library(readr)
+library(ggspatial)
+library(maptools)
+data(wrld_simpl)
+library(abind)
+library(rgdal)
+library(sf)
+
+# Set credentials for aws access
+# Should only need if accessing private repo
+# Sys.setenv(
+#   "AWS_ACCESS_KEY_ID" = "your_key",
+#   "AWS_SECRET_ACCESS_KEY" = "super_secret_key",
+#   "AWS_DEFAULT_REGION" = "us-west-2"
+# )
+
+# UNITS:
+# Temperature is measured every 30 seconds in 1/10 degree C, converted to degree C in processing
+# Pressure measured once per second in mBar, converted to meters in processing
+# Light is measured per minute on the minute in lux, not converted in processing
+
+# List content of GDR_1718 folder
+bucketlist()
+tmp = get_bucket_df(bucket = "deju-penguinscience", prefix="AllData/gdr_datashare/GDR_1617/") # 67 files
+
+## Get info on retrieved devices in "data/croz_royds_gdr_depl_all_v....csv"
+depl_dat <- read.csv("data/croz_royds_gdr_depl_all_v2021-08-27.csv")%>%
+  mutate(bird_id=as.numeric(bird_id))%>%
+  subset(season=="2016"&retrieve_date!="NA")%>%
+  mutate(id=str_pad(gsub("NA","",bird_id),9,pad="0"))
+filelist<-as.character(subset(depl_dat$filename, depl_dat$filename!="NA")) # 67 files
+
+####################
+## ALL FILES ##
+####################
+
+## Load the raw data for interactive twilight annotation
+index=67:length(filelist)
+for(i in index){
+  # pull out filename and paste to create path
+  fn<-filelist[i]
+  print(paste("Loading", fn, "...", sep=" "))
+  path=paste0("s3://deju-penguinscience/AllData/gdr_datashare/GDR_1617/",fn ,sep="")
+  raw <-s3read_using(fread, object = path, skip=25, drop=6, col.names=c("date","time","temp","pressure","lux"))
+  raw$Date  <- as.POSIXct(paste(raw$date, raw$time), format = "%d/%m/%Y %H:%M:%S", tz = "GMT")
+  raw$Light  <- log(ifelse(raw$lux<1, 1, raw$lux))
+  head(raw)
+  # Change start date and time of rebooted loggers (71695, 63054, 46408)
+  if (fn=="LUL529_71695_CRM19.txt") { # 71695
+    print(filename)
+    raw$Date <- seq(from=as.POSIXct("2016-07-18 14:43:00", tz="GMT"), by=1, length.out=length(raw$Light))
+    print(head(raw$Date))
+  } else if (fn=="LUL6117_46408_CRM7.txt") { # 46408
+    print(filename)
+    raw$Date <- seq(from=as.POSIXct("2016-08-30 10:13:00", tz="GMT"), by=1, length.out=length(raw$Light))
+    print(head(raw$Date))
+  } else if (fn=="LUL6101_63054.txt") { # 63054
+    print(filename)
+    raw$Date <- seq(from=as.POSIXct("2016-09-07 11:05:00", tz="GMT"), by=1, length.out=length(raw$Light))
+    print(head(raw$Date))
+  } else {
+    print("Dtime is fine - no need to change")
+  }
+  # Modify dtime according to clock drift
+  print("Adjusting for clock drift: applying mean drift rate of 0.9651941 sec per day")
+  drift_rate <- 0.9651941
+  end <- raw$Date[length(raw$Date)] + drift_rate*as.numeric(difftime(raw$Date[length(raw$Date)], raw$Date[1], units="days")) 
+  raw$Date <- driftAdjust(time=raw$Date, start=raw$Date[1], end=end)
+  raw$Date <- with_tz(raw$Date, "GMT")   # lubridate function   
+  print(head(raw))  
+  # Remove NA values
+  raw <- subset(raw, !is.na(raw$Light))
+  # Subset with deploy and retrieval dates
+  #deploy_date <- as.POSIXct(depl_dat$date_deployed[depl_dat$filename==paste0(filelist[i], ".txt")], format="%m/%d/%Y", tz="GMT")  
+  #retrieve_date <- as.POSIXct(depl_dat$date_retrieved[depl_dat$filename==paste0(filelist[i], ".txt")], format="%m/%d/%Y", tz="GMT")
+  deploy_date <- as.POSIXct(depl_dat$deploy_date[which(depl_dat$filename==filelist[i])], format="%Y-%m-%d", tz="GMT")
+  retrieve_date <- as.POSIXct(depl_dat$retrieve_date[which(depl_dat$filename==filelist[i],)], format="%Y-%m-%d", tz="GMT")
+  raw <- subset(raw, raw$Date>=deploy_date & raw$Date<=retrieve_date)
+  #Release coordinates
+  if (depl_dat$br_col[which(depl_dat$filename==filelist[i])]=="CROZ") {
+    lon.calib <- 169.237657
+    lat.calib <- -77.453600
+  } else {
+    lon.calib <- 166.168286
+    lat.calib <- -77.552929
+  }
+  ## Twilight annotation
+  # Choose twilight threshold and visualize raw light data
+  threshold <- 1
+  offset <- 0 # adjusts the y-axis to put night (dark shades) in the middle 
+  # (0 to 4, or 23 depending on files - no negative values!)
+  lightImage( tagdata = raw,
+              offset = offset,     
+              zlim = c(0, 20)) # There is twilight from March to October
+  tsimageDeploymentLines(raw$Date, lon = lon.calib, lat = lat.calib,
+                         offset = offset, lwd = 3, col = adjustcolor("orange", alpha.f = 0.5)) # Add sunrise and sunset times at Crozier
+  print("Starting the interactive twilight annotation - Please see http://geolocationmanual.vogelwarte.ch/twilight.html for more instructions on how to proceed")
+  # Define daily sunrise and sunset times
+  twl <- preprocessLight(raw, 
+                         threshold = threshold,
+                         offset = offset, 
+                         lmax = 11,         # max. light valu
+                         gr.Device = "default")
+
+  # Two windows will appear. Move them so they are not on top of each other and you can see
+  # both. They should look like a big black blob. This identifies the "nightime" period over
+  # time. The top of the blob shows all the sunrises and the bottom of blob shows all the 
+  # sunsets. You can note for instance that the days get longer (and thus the nights shorter)
+  # at the end of the time series, because the blob gets thinner. You may even note changes
+  # in the light image that relate to changes in activity patterns or breeding behavior.
+
+  # Step 1. Click on the window entitled "Select subset". With the left mouse button choose
+  # where you want the start of the dataset to be, and right mouse button to choose the end.
+  # You will notice that the red bar at the top moves and that the second window zooms into
+  # that time period. Select when you want your time series to start and end. This allows
+  # you to ignore for instance periods of nesting. Once you are happy with the start and 
+  # end of the timeseries press "a" on the keyboard to accept and move to next step.
+
+  # Step 2. click on the window entitled "Find twilights" and the second window will zoom in.
+  # All you need to do here is click in the dark part (in the zoomed in image i.e. the one not
+  # entitled "Find twilights") of the image and this will identify all the sunrises (orange)
+  # and sunsets (blue) based on the threshold defined in the previous section. Press "a" on 
+  # the keyboard to accept and move to next step.
+
+  # Step 3. This step is for adding or deleting points. If there are no missing data points,
+  # you can skip this step by pressing "a" on the keyboard. However, if you do want to add a
+  # point, you can click on the "Insert twilights" window to select a region of "the blob" that
+  # the second unintitled window will zoom into. In the zoomed window, use left mouse click to
+  # add a sunrise, and right mouse click to add a sunset. You can use "u" on the keyboard to 
+  # undo any changes, and "d" to delete any points which are extra. Press "a" to move to next 
+  # step.
+
+  # Step 4. This step allows you to find points which have been miss-classified (often because
+  # the bird was in the shade or in a burrow) and to move the respective sunrise or sunset to
+  # where it should be. Choose a point by clicking on it in the "edit twilights" window and the
+  # other window will display the sunrise (or sunset) from the previous and next days (purple
+  # and green) relative to the current sunrise or sunset (in black). Thus if the black line 
+  # shows a much earlier sunset or later sunrise than the purple and green ones, it is likely 
+  # badly classified. You can then left click at the point where you want the day to start and
+  # press "a" to accept and move the sunrise or sunset. You will notice the red line then moves.
+  # Do this for as many points as necessary. Then close the windows with "q".
+
+  head(twl)
+  twl$Twilight <- as.POSIXct(twl$Twilight, tz = "GMT")
+  twl <- subset(twl, !Deleted)  
+  twl <- twl[order(twl$Twilight),]
+  #Additional automated cleaning/filtering 
+  twl_filt <- twilightEdit(twilights = twl,
+                           offset = offset,
+                           window = 6,           # two days before and two days after
+                           outlier.mins = 45,    # difference in mins
+                           stationary.mins = 25, # are the other surrounding twilights within 25 mins of one another
+                           plot = TRUE)
+  # Save twilight file
+  id <- substr(fn,1,nchar(fn)-4)
+  #id <- fn
+  write.csv(twl_filt, paste0("data/geoloc/1617/twl/", id,
+                             "_twl.csv"), row.names = F)
+  # Add twilights (polar day) - To be able to estimate positions between the colony and the first real twilights
+  twl <- subset(twl_filt, !Deleted)  
+  twl <- twilightAdjust(twl, 60)
+  twl$Inserted <- FALSE
+  tm <- seq(min(raw$Date), max(raw$Date), by = "12 hours") # generate two twilights per day
+  rise <- rep(c(TRUE, FALSE), length = length(tm))[1:length(tm)]
+  tm_twl  <- twilight(tm, lon.calib, lat.calib, rise = rise, zenith = 80, iters = 5) # zenith=80, need to use solar angle about 10° above the horizon
+  #twl_add <- data.frame(Twilight = tm_twl, Rise = rise, Deleted = FALSE, Marker=0, Inserted = TRUE, Twilight3 = tm_twl, Marker3=0) 
+  twl_add <- data.frame(Twilight = tm_twl, Rise = rise, Deleted = FALSE, Edited = FALSE, Twilight0 = tm_twl, Inserted = TRUE) 
+  twl_add <- subset(twl_add, !is.na(Twilight))
+  twl <- rbind(twl_add[twl_add$Twilight<min(twl$Twilight)-12*60*60,], twl, twl_add[twl_add$Twilight>max(twl$Twilight)+12*60*60,])
+  write.csv(twl, paste0("data/geoloc/1617/twl/", fn,
+                        "_twl_long.csv"), row.names = F)
+  # Clean up
+  rm(raw, twl, twl_filt, twl_add)
+  gc()  # run garbage collector to free up memory
+  # delete temp files from C: drive
+  tmp_dir <- tempdir()
+  files <- list.files(tmp_dir, full.names = T, pattern = "^file")
+  file.remove(files)
+  #End loop
+}
+print("No more file to process")
+
+## If needs to delete data
+#twl <- subset(twl, twl$Twilight>=as.POSIXct("2018-02-23", tz="GMT") & twl$Twilight<=as.POSIXct("2018-10-24", tz="GMT"))
+twl <- subset(twl, twl$Twilight<=as.POSIXct("2018-10-28", tz="GMT"))
+twl <- twl[-c(2:3, 6:11, 14:21, 24:29, 32:35, 38:41, 44:49, 52:55, 58:59, 62:63), ] # remove specific rows
+head(twl, 50)
+#Additional automated cleaning/filtering 
+twl_filt <- twilightEdit(twilights = twl,
+                         offset = offset,
+                         window = 4,           # two days before and two days after
+                         outlier.mins = 45,    # difference in mins
+                         stationary.mins = 25, # are the other surrounding twilights within 25 mins of one another
+                         plot = TRUE)
+# Save twilight file
+#id <- substr(fn,1,nchar(fn)-4)
+id <- fn
+write.csv(twl_filt, paste0("data/geoloc/1617/twl/", id,
+                           "_twl.csv"), row.names = F)
+# Add twilights (polar day) - To be able to estimate positions between the colony and the first real twilights
+twl <- subset(twl_filt, !Deleted)  
+twl <- twilightAdjust(twl, 60)
+twl$Inserted <- FALSE
+tm <- seq(min(raw$Date), max(raw$Date), by = "12 hours") # generate two twilights per day
+rise <- rep(c(TRUE, FALSE), length = length(tm))[1:length(tm)]
+tm_twl  <- twilight(tm, lon.calib, lat.calib, rise = rise, zenith = 80, iters = 5) # zenith=80, need to use solar angle about 10° above the horizon
+#twl_add <- data.frame(Twilight = tm_twl, Rise = rise, Deleted = FALSE, Edited = FALSE, Twilight0 = tm_twl, Inserted = TRUE) 
+twl_add <- data.frame(Twilight = tm_twl, Rise = rise, Deleted = FALSE, Marker=0, Inserted = TRUE, Twilight3 = tm_twl, Marker3=0 )
+twl_add <- subset(twl_add, !is.na(Twilight))
+twl <- rbind(twl_add[twl_add$Twilight<min(twl$Twilight),], twl, twl_add[twl_add$Twilight>max(twl$Twilight),])
+write.csv(twl, paste0("data/geoloc/1617/twl/", fn,
+                      "_twl_long.csv"), row.names = F)
+
+# twl[420:470,]
+# twl[500:550,]
+# twl <- twl[-c(464), ]