This data and code runs a species distribution model on sea turtle tracking locations using the Following steps:

Load and plot the data
Model the relationship between sea turtle presence and environmental data
Predict sea turtle presence under future climate change

modeling

In order to speed things up I went ahead and sampled environmental data in ArcMap using the marine geospatial ecology toolset. There are plenty of ways to do this in R using Env. raster data downloaded from GEE.

plot(turtle_dat_sp)

plot of chunk unnamed-chunk-3

training and testing

split into training and testing datasets

quick random forest

random forest model to assess the effect of ENV variables on turtle presence.

# Train the model using randomForest and predict on the training data itself.
model_rf = train(presence2 ~ SST + SSAL + DEPTH + DIST, data=turtle_dat, method='rf')
model_rf

## Random Forest 
## 
## 843 samples
##   4 predictor
##   2 classes: 'absent', 'present' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 843, 843, 843, 843, 843, 843, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa     
##   2     0.8096419  0.04765169
##   3     0.8050976  0.04460484
##   4     0.8023045  0.04114005
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.

varimp_rf <- varImp(model_rf)
plot(varimp_rf, main="Variable Importance with RF")

plot of chunk unnamed-chunk-5

glm <- glm(presence ~ SST + SSAL + DEPTH + DIST, data=turtle_dat, family=binomial()) summary(glm)

## 
## Call:
## glm(formula = presence ~ SST + SSAL + DEPTH + DIST, family = binomial(), 
##     data = turtle_dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0210  -0.6370  -0.5950  -0.5319   2.0827  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  6.341e+00  3.127e+00   2.028   0.0426 *
## SST         -1.918e-01  8.991e-02  -2.133   0.0329 *
## SSAL        -7.402e-02  7.403e-02  -1.000   0.3173  
## DEPTH       -3.342e-05  9.351e-05  -0.357   0.7208  
## DIST        -1.484e-07  6.766e-07  -0.219   0.8263  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 777.09  on 842  degrees of freedom
## Residual deviance: 770.14  on 838  degrees of freedom
## AIC: 780.14
## 
## Number of Fisher Scoring iterations: 4

machine learning

Use a cross validated random forest model from the caret package.

# Define the training control
fitControl <- trainControl(
method = 'cv',                   # k-fold cross validation
number = 5,                      # number of folds
savePredictions = 'final',       # saves predictions for optimal tuning parameter
classProbs = T,                  # should class probabilities be returned
summaryFunction=twoClassSummary  # results summary function
)

model_rf2 = train(presence2 ~ SST + SSAL + DEPTH + DIST, data=trainData, method='rf', tuneLength = 5, metric='ROC', trControl = fitControl)

## note: only 3 unique complexity parameters in default grid. Truncating the grid to 3 .

#Predict on testData and Compute the confusion matrix
predicted2 <- predict(model_rf2, testData)
predicted2 <- as.factor(predicted2)
testData$presence2 <- as.factor(testData$presence2)
confusionMatrix(reference = testData$presence2, data = predicted2, mode='everything', positive='present')

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction absent present
##    absent     131      29
##    present      5       3
##                                           
##                Accuracy : 0.7976          
##                  95% CI : (0.7288, 0.8556)
##     No Information Rate : 0.8095          
##     P-Value [Acc > NIR] : 0.6937          
##                                           
##                   Kappa : 0.0799          
##                                           
##  Mcnemar's Test P-Value : 7.998e-05       
##                                           
##             Sensitivity : 0.09375         
##             Specificity : 0.96324         
##          Pos Pred Value : 0.37500         
##          Neg Pred Value : 0.81875         
##               Precision : 0.37500         
##                  Recall : 0.09375         
##                      F1 : 0.15000         
##              Prevalence : 0.19048         
##          Detection Rate : 0.01786         
##    Detection Prevalence : 0.04762         
##       Balanced Accuracy : 0.52849         
##                                           
##        'Positive' Class : present         
##

run selected model on entire dataset

now that we have selected a “good” model, run with all of the data.

model_rf2 = train(presence2 ~ SST + SSAL + DEPTH + DIST, data=turtle_dat, method='rf', tuneLength = 5, metric='ROC', trControl = fitControl)

## note: only 3 unique complexity parameters in default grid. Truncating the grid to 3 .

Predict under Future Climate Change

get ENV data

I downloaded climate change data for SST and Salinity for 2006-2055 for one climate model (GFDL) for an annual time period and RCP 8.5. For this to be complete, you need to go and get the other data:
3 models (i.e need GFDL-ESM2M, IPSL-CM5A-MR, MPI-ESM-MR.)
2 time periods (2006-2055 and 2050-2099)
2 RCPs (8.5, 2.6)
any other env data you use (CHLA?)

library(ncdf4)
#read in the data 
#sst
ncpath <- "~/Documents/Classes/course_lectures/ViaX/project/"
ncname <- "GFDL_8.5_SST"
ncfname <- paste(ncpath, ncname, ".nc", sep="")
ncin <- nc_open(ncfname)
names(ncin$var)

## [1] "histclim"   "anomaly"    "histstddev" "varratio"

anom_sst <- stack(ncfname, varname="anomaly")
plot(anom_sst)

plot of chunk unnamed-chunk-8

#sal
ncpath <- "~/Documents/Classes/course_lectures/ViaX/project/"
ncname <- "GFDL_8.5_SAL"
ncfname <- paste(ncpath, ncname, ".nc", sep="")
ncin <- nc_open(ncfname)
names(ncin$var)

## [1] "histclim"   "anomaly"    "histstddev" "varratio"

anom_sal <- stack(ncfname, varname="anomaly")
plot(anom_sal)

plot of chunk unnamed-chunk-8

Now we want to add the anomoly data to the historical data to get high resolution future

#sst
sst_hist_hires <- raster("~/Documents/Classes/course_lectures/ViaX/project/water_temp_0000m_cumulative_mean.img")
plot(sst_hist_hires)

plot of chunk unnamed-chunk-9

anom_sst_hires <- disaggregate(anom_sst, fact=12.5) #get to same resolution as hist (1/.08)
anom_sst_hires <- projectRaster(anom_sst_hires,sst_hist_hires,method = 'bilinear')
plot(anom_sst_hires)

plot of chunk unnamed-chunk-9

sst_fut <- sst_hist_hires + anom_sst_hires
plot(sst_fut)

plot of chunk unnamed-chunk-9

#sal
sal_hist_hires <- raster("~/Documents/Classes/course_lectures/ViaX/project/salinity_0000m_cumulative_mean.img")
plot(sal_hist_hires)

plot of chunk unnamed-chunk-9

anom_sal_hires <- disaggregate(anom_sal, fact=12.5) #get to same resolution as hist (1/.08)
anom_sal_hires <- projectRaster(anom_sal_hires,sal_hist_hires,method = 'bilinear')
plot(anom_sal_hires)

plot of chunk unnamed-chunk-9

sal_fut <- sal_hist_hires + anom_sal_hires
plot(sal_fut)

plot of chunk unnamed-chunk-9

Predict

probability of presence

Use the model we ran and the environmental rasters to predict probability of presence in the future and past.

SST <- sst_fut
SSAL <- sal_fut

DEPTH <- raster("~/Documents/Classes/course_lectures/ViaX/project/DEPTH.img")
plot(DEPTH)

plot of chunk unnamed-chunk-10

DIST <- raster("~/Documents/Classes/course_lectures/ViaX/project/dist.img")
plot(DIST)

plot of chunk unnamed-chunk-10

#it looks like depth is a little off. Need to make sure it's in the right projection
DEPTH <- projectRaster(DEPTH,SST,method = 'bilinear')

## Warning in projectRaster(DEPTH, SST, method = "bilinear"): input and ouput crs are the same

#create a stack of predictors 
predictors <- stack(SST, SSAL, DEPTH, DIST)
names(predictors) <- c("SST", "SSAL", "DEPTH", "DIST")
#predict on future dataset. 
p_fut <- predict(predictors, model_rf2, type="prob")


SST <- sst_hist_hires
SSAL <- sal_hist_hires

predictors <- stack(SST, SSAL, DEPTH, DIST)
names(predictors) <- c("SST", "SSAL", "DEPTH", "DIST")
p_hist <- predict(predictors, model_rf2, type="prob")
plot(p_hist)

Historic Probability of Presence plot of chunk unnamed-chunk-10

plot(p_fut)

Future Probability of Presence

plot of chunk unnamed-chunk-10

#change in future
#calculate the change between the past and the future
diff <- p_fut - p_hist
plot(diff)

Change in Probability of Presence under Climate Change

Turtle Modeling

modeling

training and testing

quick random forest

machine learning

run selected model on entire dataset

Predict under Future Climate Change

get ENV data

Predict

probability of presence

Leave a comment Cancel reply

modeling

training and testing

quick random forest

machine learning

run selected model on entire dataset

Predict under Future Climate Change

get ENV data

Predict

probability of presence

Share this:

Related

Leave a comment Cancel reply