# ------------------------------------------------------------------------------------------------ # Process the Voordelta gape measurements # ------------------------------------------------------------------------------------------------ library(chron) Z <- read.table("./Raw datasets/Voordelta juni 2021.DAT",sep=",",header=TRUE,skip=6) dim(Z) head(Z) attach(Z) # ---------------------- Create new column with dates/time -------------------------------------------- Z$Datetime=ISOdate((Year),Month,Day,Hour,Minute,Second,tz="CET") Z$Datum=as.POSIXct(Z$Datetime,format='%Y/%M/%d %H:%M:%S',tz="CET") str(Z$Datum[1:10]) # ---------------------- New column with only date information -------------------------------------------- Z$new_Datum = as.Date(Z$Datetime,format = "%d/%m/%Y") range(Z$new_Datum,na.rm=T) Start_date <- as.Date(min(Z$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(Z$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? with(Z, difftime(max(Z$new_Datum, na.rm = TRUE), min(Z$new_Datum, na.rm = TRUE))) difftime(End_date, Start_date) #format of the date: class(Z$new_Datum) #if not already "Date" need to convert to that first #Are there NA values in any rows? row.has.na <- apply(Z, 1, function(x){any(is.na(x))}) #How many rows? sum(row.has.na) #remove this row from df Z = Z[as.numeric(row.names(Z[!row.has.na, ])), ] #Z = Z[!row.has.na, ] #shorter method; do not run both lines, or you will get NA again unique(Z$new_Datum) #is the NA gone? # ----------------------------------------------------------------------------------- # convert the full dataset to gaping ratio (0 - 1) # ----------------------------------------------------------------------------------- # Dist.1=sqrt(1/Z$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1,na.rm=TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 Dist.2=sqrt(1/Z$Ch2) Range.2=max(Dist.2,na.rm=TRUE)-min(Dist.2,na.rm=TRUE) Perc.2=(Dist.2-min(Dist.2,na.rm=TRUE))/Range.2 Dist.3=sqrt(1/Z$Ch3) Range.3=max(Dist.3,na.rm=TRUE)-min(Dist.3,na.rm=TRUE) Perc.3=(Dist.3-min(Dist.3,na.rm=TRUE))/Range.3 Dist.4=sqrt(1/Z$Ch4) Range.4=max(Dist.4,na.rm=TRUE)-min(Dist.4,na.rm=TRUE) Perc.4=(Dist.4-min(Dist.4,na.rm=TRUE))/Range.4 Dist.5=sqrt(1/Z$Ch5) Range.5=max(Dist.5,na.rm=TRUE)-min(Dist.5,na.rm=TRUE) Perc.5=(Dist.5-min(Dist.5,na.rm=TRUE))/Range.5 Dist.6=sqrt(1/Z$Ch6) Range.6=max(Dist.6,na.rm=TRUE)-min(Dist.6,na.rm=TRUE) Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.6 Dist.7=sqrt(1/Z$Ch7) Range.7=max(Dist.7,na.rm=TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.7 Dist.8=sqrt(1/Z$Ch8) Range.8=max(Dist.8,na.rm=TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.8 # -------------------------------------------------- # Add the calculated (Perc.#) to the full dataframe Z; then subset as needed Z = cbind(Z, Perc.1 = Perc.1, Perc.2 = Perc.2, Perc.3 = Perc.3, Perc.4 = Perc.4, Perc.5 = Perc.5, Perc.6 = Perc.6, Perc.7 = Perc.7, Perc.8 = Perc.8) # ---------------- Select only the first 4 weeks ------------------------------------------------------------ filter = as.Date(Start_date) + 28 Start_date + 7 Z_4 = Z[Z$new_Datum <= filter, ] Start = 60480 # plot the data (now only the 1st 4 weeks) par(mfrow = c(4,1)) #channel one does not contain good data Dist.1=sqrt(1/Z_4$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1[!is.infinite(Dist.1)], na.rm = TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 plot((Perc.1)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #added comma after %m to make the time more readable abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) Dist.2=sqrt(1/Z_4$Ch2) Range.2=max(Dist.2,na.rm=TRUE)-min(Dist.2,na.rm=TRUE) Perc.2=(Dist.2-min(Dist.2,na.rm=TRUE))/Range.2 plot((Perc.2)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="day"), format="%d/%m,%H:%M",las=2,cex.axis=1) abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) Dist.5=sqrt(1/Z_4$Ch5) Range.5=max(Dist.5,na.rm=TRUE)-min(Dist.5,na.rm=TRUE) Perc.5=(Dist.5-min(Dist.5,na.rm=TRUE))/Range.5 plot((Perc.5)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) Dist.6=sqrt(1/Z_4$Ch6) Range.6=max(Dist.6,na.rm=TRUE)-min(Dist.6,na.rm=TRUE) Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.6 plot((Perc.6)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) Dist.7=sqrt(1/Z_4$Ch7) Range.7=max(Dist.7,na.rm=TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.7 plot((Perc.7)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="6 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) Dist.8=sqrt(1/Z_4$Ch8) Range.8=max(Dist.8,na.rm=TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.8 plot((Perc.8)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) abline(v = Z_4$Datum[c(Start, 207360)], col = "blue", lty = 4, lwd = 2) # --------------------------------------------------------------------------------- # Subset the desired time range for the analyses # --------------------------------------------------------------------------------- # channels 3, 4 and (maybe) 6 contain no data par(mfrow = c(6,1)) colnames(Z_4) #-------------- # Channel 1 #-------------- Ch1 = Z_4[ ,c(1:7, 15:19)] Ch1 = Ch1[60480:207360, ] Dist.1=sqrt(1/Ch1$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1[!is.infinite(Dist.1)], na.rm = TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 plot((Perc.1)~Ch1$Datum,type="l",xaxt="n",xlab="") r=range(Ch1$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #added comma after %m to make the time more readable #-------------- # Channel 2 #-------------- Ch2 = Z_4[ ,c(1:6,8, 15:18, 20)] Ch2 = Ch2[60480:207360, ] Dist.2=sqrt(1/Ch2$Ch2) #to ensure the same length, specify df as well, instead of just the column (Ch2, etc.) Range.1=max(Dist.2[!is.infinite(Dist.2)], na.rm = TRUE)-min(Dist.2,na.rm=TRUE) Perc.2=(Dist.2-min(Dist.2,na.rm=TRUE))/Range.1 plot((Perc.2)~Ch2$Datum,type="l",xaxt="n",xlab="") r=range(Ch2$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #-------------- # Channel 5 #-------------- Ch5 = Z_4[ ,c(1:6,11, 15:18,23)] Ch5 = Ch5[60480:207360, ] Dist.5=sqrt(1/Ch5$Ch5) #to ensure the same length, specify df as well, instead of just the column (Ch5, etc.) Range.1=max(Dist.5[!is.infinite(Dist.5)], na.rm = TRUE)-min(Dist.5,na.rm=TRUE) Perc.5=(Dist.5-min(Dist.5,na.rm=TRUE))/Range.1 plot((Perc.5)~Ch5$Datum,type="l",xaxt="n",xlab="") r=range(Ch5$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #-------------- # Channel 6 maybe good; check #-------------- # Ch6 = Z_4[ ,c(1:6, 12, 15:18,24)] # #Remove first 14000 points = 1.6 days # #Remove 1.2 days between 21.4 - 22.6 # Ch6 = Ch6[60480:207360, ] # # Dist.6=sqrt(1/Ch6$Ch6) # # Range.1=max(Dist.6[!is.infinite(Dist.6)], na.rm = TRUE)-min(Dist.6,na.rm=TRUE) # # Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.1 # # plot((Perc.6)~Ch6$Datum,type="l",xaxt="n",xlab="") # # r=range(Ch6$Datum,na.rm=T) # # axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), # # format="%d/%m,%H:%M",las=2,cex.axis=1) # # #There is a bottom outlier peak around 27/5; filter values below 0.75 out # # difftime(Ch6$Datum[Ch6$Perc.6 <= .75][131], Ch6$Datum[Ch6$Perc.6 <= .75][1])#period of 30.7 minutes # #nrow(Ch6[Ch6$Perc.6 <= .75, ]) # #Ch6 = Ch6[Ch6$Perc.6 > .75, ] #this removes 131 rows # #recalculate and plot new Ch6 results # Dist.6=sqrt(1/Ch6$Ch6) # Range.1=max(Dist.6[!is.infinite(Dist.6)], na.rm = TRUE)-min(Dist.6,na.rm=TRUE) # Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.1 # plot((Perc.6)~Ch6$Datum,type="l",xaxt="n",xlab="") # r=range(Ch6$Datum,na.rm=T) # axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), # format="%d/%m,%H:%M",las=2,cex.axis=1) # #Note that this period is shorter than the rest # difftime(tail(Ch6$Datum, n = 1, head(Ch6$Datum, n = 1)))#period of 30.7 minutes #-------------- # Channel 7 #-------------- Ch7 = Z_4[ ,c(1:6, 13, 15:18,25)] Ch7 = Ch7[60480:207360, ] #Ch7 = Ch7[1:98000, ] #1st and 2nd segments #Ch7 = Ch7[135000:236500, ] #1st and 2nd segments Dist.7=sqrt(1/Ch7$Ch7) Range.1=max(Dist.7[!is.infinite(Dist.7)], na.rm = TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.1 plot((Perc.7)~Ch7$Datum,type="l",xaxt="n",xlab="") r=range(Ch7$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #-------------- # Channel 8 #-------------- Ch8 = Z_4[ ,c(1:6, 14, 15:18,26)] Ch8 = Ch8[60480:207360, ] Dist.8=sqrt(1/Ch8$Ch8) Range.1=max(Dist.8[!is.infinite(Dist.8)], na.rm = TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.1 plot((Perc.8)~Ch8$Datum,type="l",xaxt="n",xlab="") r=range(Ch8$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) Voordelta_zo_Perc = cbind(Ch1, Perc.1, Ch2$Ch2, Perc.2, Ch5$Ch5, Perc.5, Ch7$Ch7, Perc.7, Ch8$Ch8, Perc.8) write.csv(Voordelta_zo_Perc, "Voordelta_zo_Perc.csv") ############################################################################## ############################################################################## #----------------------------------------------------------------------------- # Process the February data #----------------------------------------------------------------------------- Z <- read.table("./Raw datasets/Voordelta februari 2020.DAT",sep=",",header=TRUE,skip=6) dim(Z) head(Z) attach(Z) # ---------------------- Create new column with dates/time -------------------------------------------- Z$Datetime=ISOdate((Year),Month,Day,Hour,Minute,Second,tz="CET") Z$Datum=as.POSIXct(Z$Datetime,format='%Y/%M/%d %H:%M:%S',tz="CET") str(Z$Datum[1:10]) # ---------------------- New column with only date information -------------------------------------------- Z$new_Datum = as.Date(Z$Datetime,format = "%d/%m/%Y") range(Z$new_Datum,na.rm=T) Start_date <- as.Date(min(Z$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(Z$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? with(Z, difftime(max(Z$new_Datum, na.rm = TRUE), min(Z$new_Datum, na.rm = TRUE))) difftime(End_date, Start_date) #format of the date: class(Z$new_Datum) #if not already "Date" need to convert to that first #Are there NA values in any rows? #row.has.na <- apply(Z, 1, function(x){any(is.na(x))}) #How many rows? #sum(row.has.na) #remove this row from df Z = Z[as.numeric(row.names(Z[!row.has.na, ])), ] #Z = Z[!row.has.na, ] #shorter method; do not run both lines, or you will get NA again unique(Z$new_Datum) #is the NA gone? # ----------------------------------------------------------------------------------- # convert the full dataset to gaping ratio (0 - 1) # ----------------------------------------------------------------------------------- #channel one does not contain good data Dist.1=sqrt(1/Z$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1[!is.infinite(Dist.1)], na.rm = TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 Dist.2=sqrt(1/Z$Ch2) Range.2=max(Dist.2,na.rm=TRUE)-min(Dist.2,na.rm=TRUE) Perc.2=(Dist.2-min(Dist.2,na.rm=TRUE))/Range.2 Dist.3=sqrt(1/Z$Ch3) Range.3=max(Dist.3,na.rm=TRUE)-min(Dist.3,na.rm=TRUE) Perc.3=(Dist.3-min(Dist.3,na.rm=TRUE))/Range.3 Dist.4=sqrt(1/Z$Ch4) Range.4=max(Dist.4,na.rm=TRUE)-min(Dist.4,na.rm=TRUE) Perc.4=(Dist.4-min(Dist.4,na.rm=TRUE))/Range.4 Dist.5=sqrt(1/Z$Ch5) Range.5=max(Dist.5,na.rm=TRUE)-min(Dist.5,na.rm=TRUE) Perc.5=(Dist.5-min(Dist.5,na.rm=TRUE))/Range.5 Dist.6=sqrt(1/Z$Ch6) Range.6=max(Dist.6,na.rm=TRUE)-min(Dist.6,na.rm=TRUE) Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.6 Dist.7=sqrt(1/Z$Ch7) Range.7=max(Dist.7,na.rm=TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.7 Dist.8=sqrt(1/Z$Ch8) Range.8=max(Dist.8,na.rm=TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.8 # -------------------------------------------------- # Add the calculated (Perc.#) to the full dataframe Z; then subset as needed Z = cbind(Z, Perc.1 = Perc.1, Perc.2 = Perc.2, Perc.3 = Perc.3, Perc.4 = Perc.4, Perc.5 = Perc.5, Perc.6 = Perc.6, Perc.7 = Perc.7, Perc.8 = Perc.8) # ---------------- Time range to select ------------------------------------------------------------ # Compared to the summer, the time range is much greater (155 days vs. 34 days) filter = as.Date(Start_date) + 28 Z_4 = Z[Z$new_Datum <= filter, ] #reduced from 1344623 to 250560 rows # --------------------------------------------------------------------------------------------------- # plot the data (now only the 1st 4 weeks) par(mfrow = c(4,1)) #channel one does not contain good data Dist.1=sqrt(1/Z_4$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1[!is.infinite(Dist.1)], na.rm = TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 plot((Perc.1)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #added comma after %m to make the time more readable abline(v = Z_4$Datum[60480,207360], col = "blue", lty = 4, lwd = 2) Dist.6=sqrt(1/Z_4$Ch6) Range.6=max(Dist.6,na.rm=TRUE)-min(Dist.6,na.rm=TRUE) Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.6 plot((Perc.6)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) Dist.7=sqrt(1/Z_4$Ch7) Range.7=max(Dist.7,na.rm=TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.7 plot((Perc.7)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="6 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) Dist.8=sqrt(1/Z_4$Ch8) Range.8=max(Dist.8,na.rm=TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.8 plot((Perc.8)~Z_4$Datum,type="l",xaxt="n",xlab="") r=range(Z_4$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) # --------------------------------------------------------------------------------- # Subset the desired time range for the analyses # --------------------------------------------------------------------------------- # channels 2 - 5 contain no data #-------------- # Channel 1 #-------------- Ch1 = Z_4[ ,c(1:7, 15:19)] Ch1 = Ch1[60480:207360, ] Dist.1=sqrt(1/Ch1$Ch1) #to ensure the same length, specify df as well, instead of just the column (Ch1, etc.) Range.1=max(Dist.1[!is.infinite(Dist.1)], na.rm = TRUE)-min(Dist.1,na.rm=TRUE) Perc.1=(Dist.1-min(Dist.1,na.rm=TRUE))/Range.1 plot((Perc.1)~Ch1$Datum,type="l",xaxt="n",xlab="") r=range(Ch1$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #added comma after %m to make the time more readable #-------------- # Channel 6 #-------------- Ch6 = Z_4[ ,c(1:6, 12, 15:18,24)] Ch6 = Ch6[60480:207360, ] Dist.6=sqrt(1/Ch6$Ch6) Range.1=max(Dist.6[!is.infinite(Dist.6)], na.rm = TRUE)-min(Dist.6,na.rm=TRUE) Perc.6=(Dist.6-min(Dist.6,na.rm=TRUE))/Range.1 plot((Perc.6)~Ch6$Datum,type="l",xaxt="n",xlab="") r=range(Ch6$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #added comma after %m to make the time more readable #-------------- # Channel 7 #-------------- Ch7 = Z_4[ ,c(1:6, 13, 15:18,25)] Ch7 = Ch7[60480:207360, ] #Ch7 = Ch7[1:98000, ] #1st and 2nd segments #Ch7 = Ch7[135000:236500, ] #1st and 2nd segments Dist.7=sqrt(1/Ch7$Ch7) Range.1=max(Dist.7[!is.infinite(Dist.7)], na.rm = TRUE)-min(Dist.7,na.rm=TRUE) Perc.7=(Dist.7-min(Dist.7,na.rm=TRUE))/Range.1 plot((Perc.7)~Ch7$Datum,type="l",xaxt="n",xlab="") r=range(Ch7$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #-------------- # Channel 8 #-------------- Ch8 = Z_4[ ,c(1:6, 14, 15:18,26)] Ch8 = Ch8[60480:207360, ] #Ch8 = Ch8[1:98000, ] #1st and 2nd segments #Ch8 = Ch8[135000:236500, ] #1st and 2nd segments Dist.8=sqrt(1/Ch8$Ch8) Range.1=max(Dist.8[!is.infinite(Dist.8)], na.rm = TRUE)-min(Dist.8,na.rm=TRUE) Perc.8=(Dist.8-min(Dist.8,na.rm=TRUE))/Range.1 plot((Perc.8)~Ch8$Datum,type="l",xaxt="n",xlab="") r=range(Ch8$Datum,na.rm=T) axis.POSIXct(1,at=seq(r[1],r[2],by="24 hour"), format="%d/%m,%H:%M",las=2,cex.axis=1) #----------------------------------------------------------------------------- # Get the start/end times to match with chl-a and velocity datasets: # remember channel channel size will need to be trimmed (131 points in the middle) Start_time <- min(Ch8$Datum, na.rm = TRUE) # "2020-10-26 23:59:50 CET" End_time <- max(Ch8$Datum, na.rm = TRUE) # "2020-11-12 23:59:50 CET" ############################################################################## ############################################################################## Voordelta_wi_Perc = cbind(Ch1, Perc.1, Ch6$Ch6, Perc.6, Ch7$Ch7, Perc.7, Ch8$Ch8, Perc.8) write.csv(Voordelta_wi_Perc, "Voordelta_wi_Perc.csv") # -------------------------------------------------------------------------------------------- # Process the environmental data from the Voordelta experiment # -------------------------------------------------------------------------------------------- require(forecast) # ---------------------------------------------------------------------------------------- # Read the chlorophyl data (also includes turbidity data) # ---------------------------------------------------------------------------------------- chla_zo <- read.csv("./Raw datasets/Chl 20210510_0900_ACLW-USB_0753_103456_A.csv", skip = 30, header = T) chla_wi <- read.csv("./Raw datasets/Chl data voordelta1_winter.csv", head=T) chla_zo$Date = strptime(chla_zo$Date,"%m/%d/%Y %H:%M") chla_wi$Date = strptime(chla_wi$Date,"%m/%d/%Y %H:%M") # -------Summer------- New column with only date information -------------------------------------------- chla_zo$new_Datum = as.Date(chla_zo$Date,format = "%m/%d/%Y") range(chla_zo$new_Datum,na.rm=T) #the data range (start / end) Start_date <- as.Date(min(chla_zo$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(chla_zo$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? ==> 71 days with(chla_zo, difftime(max(chla_zo$new_Datum, na.rm = TRUE), min(chla_zo$new_Datum, na.rm = TRUE))) #another way of measuring the total measurement time length difftime(End_date, Start_date) #format of the date: class(chla_zo$new_Datum) #if not already as class "Date", need to convert to that first #Plot the summer raw data plot(chla_zo$Chl.a.ug.l., type = "l", las = 1, ann = FALSE) mtext(side = 1, "time (minutes)", line = 3) mtext(side = 2, expression( mu~"g L"^{-1}), line = 3) #calculate the mean and standard deviation chla_mean_zo = aggregate(chla_zo[ ,2:6], by = list(chla_zo$new_Datum), mean) chla_SD_zo = aggregate(chla_zo[ ,2:6], by = list(chla_zo$new_Datum), sd) #plot the mean; peaks/outliers should be largely reduced; only first 4 weeks plot(chla_mean_zo$Chl.a.ug.l.[1:28], type = "l", las = 1, ann = FALSE) points(chla_mean_zo$Chl.a.ug.l.[1:28]- chla_SD_zo$Chl.a.ug.l.[1:28], cex = 0.1) points(chla_mean_zo$Chl.a.ug.l.[1:28]+ chla_SD_zo$Chl.a.ug.l.[1:28], cex = 0.1) mtext(side = 1, "time (days)", line = 3) mtext(side = 2, expression( mu~"g L"^{-1}), line = 3) # -------Winter-------- New column with only date information -------------------------------------------- chla_wi$new_Datum = as.Date(chla_wi$Date,format = "%m/%d/%Y") range(chla_wi$new_Datum,na.rm=T) Start_date <- as.Date(min(chla_wi$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(chla_wi$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? ==> 153 days with(chla_wi, difftime(max(chla_wi$new_Datum, na.rm = TRUE), min(chla_wi$new_Datum, na.rm = TRUE))) difftime(End_date, Start_date) #format of the date: class(chla_wi$new_Datum) #if not already "Date" need to convert to that first; use as.Date() #Plot the winter raw data plot(chla_wi$Chl.a.ug.l., type = "l", las = 1, ann = FALSE) mtext(side = 1, "time (minutes)", line = 3) mtext(side = 2, expression( mu~"g L"^{-1}), line = 3) #calculate the mean and standard deviation chla_mean_wi = aggregate(chla_wi[ ,2:6], by = list(chla_wi$new_Datum), mean) chla_SD_wi = aggregate(chla_wi[ ,2:6], by = list(chla_wi$new_Datum), sd) #plot the mean; peaks/outliers should be largely reduced plot(chla_mean_wi$Chl.a.ug.l.[1:28], type = "l", las = 1, ylim = c(0, 20), ann = FALSE) mtext(side = 1, "time (days)", line = 3) mtext(side = 2, expression( mu~"g L"^{-1}), line = 3) points(chla_mean_wi$Chl.a.ug.l.[1:28]- chla_SD_wi$Chl.a.ug.l.[1:28], cex = 0.1) points(chla_mean_wi$Chl.a.ug.l.[1:28]+ chla_SD_wi$Chl.a.ug.l.[1:28], cex = 0.1) # -------------------------------------------------------------------------------------------------- # Process the flow datasets # -------------------------------------------------------------------------------------------------- Flow_zo <- read.csv("./Raw datasets/Data Aquadopp klepstand voordelta 20-07-2021.csv", skip = 2, header = T) colnames(Flow_zo) = c("Month", "Day", "Year", "Hour", "Minute", "Second", "Error code", "Status code", "Velocity (Beam1|X|East)", "Velocity (Beam2|Y|North)", "Velocity (Beam3|Z|Up)", "Amplitude (Beam1)", "Amplitude (Beam2)", "Amplitude (Beam3)", "Battery voltage", "Soundspeed", "Heading", "Pitch", "Roll", "Pressure", "Temperature", "Analog input 1", "Analog input 2", "Speed", "Direction") Flow_wi <- read.csv("./Raw datasets/data aquadopp klepstand_winter.csv", head=T, skip = 1) colnames(Flow_wi) = c("Month", "Day", "Year", "Hour", "Minute", "Second", "Error code", "Status code", "Velocity (Beam1|X|East)", "Velocity (Beam2|Y|North)", "Velocity (Beam3|Z|Up)", "Amplitude (Beam1)", "Amplitude (Beam2)", "Amplitude (Beam3)", "Battery voltage", "Soundspeed", "Heading", "Pitch", "Roll", "Pressure", "Temperature", "Analog input 1", "Analog input 2", "Speed", "Direction") # -------Summer------- New column with only date information -------------------------------------------- attach(Flow_zo) Flow_zo$Datetime = ISOdate((Year),Month,Day,Hour,Minute,tz="CET") Flow_zo$Datetime=as.POSIXct(Flow_zo$Datetime,format='%Y/%M/%d %H:%M',tz="CET") attach(Flow_wi) Flow_wi$Datetime = ISOdate((Year),Month,Day,Hour,Minute,tz="CET") Flow_wi$Datetime=as.POSIXct(Flow_wi$Datetime,format='%Y/%M/%d %H:%M',tz="CET") #only day, month, year Flow_zo$new_Datum = paste(Flow_zo[ ,2], Flow_zo[ ,1], Flow_zo[ ,3], sep = "-", collapse = NULL)#(Flow_zo$Date,format = "%d/%m/%Y") Flow_zo$new_Datum = as.Date(Flow_zo$new_Datum, na.rm = TRUE ,format = "%d-%m-%Y") range(Flow_zo$new_Datum,na.rm=T) Start_date <- as.Date(min(Flow_zo$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(Flow_zo$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? difftime(End_date, Start_date) # -------winter------- New column with only date information -------------------------------------------- Flow_wi$new_Datum = paste(Flow_wi[ ,2], Flow_wi[ ,1], Flow_wi[ ,3], sep = "-", collapse = NULL)#(Flow_zo$Date,format = "%d/%m/%Y") Flow_wi$new_Datum = as.Date(Flow_wi$new_Datum, na.rm = TRUE ,format = "%d-%m-%Y") range(Flow_wi$new_Datum,na.rm=T) Start_date <- as.Date(min(Flow_wi$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") End_date <- as.Date(max(Flow_wi$new_Datum, na.rm = TRUE) ,format = "%d/%m/%Y") #how many days of measurements in total? difftime(End_date, Start_date) # ------- match time with gaping datasets -------------------------------------------- #first match the start-time exactly to the gaping dataset # summer Flow_zo = Flow_zo[Flow_zo$Datetime > "2021-05-17 08:59:50" & Flow_zo$Datetime < "2021-06-03 08:59:50", ] Flow_wi = Flow_wi[Flow_wi$Datetime >= "2020-10-26 23:59:50" & Flow_wi$Datetime < "2020-11-12 23:59:50", ] write.csv(Flow_zo, "Flow_zo.csv") write.csv(Flow_wi, "Flow_wi.csv")