IPO_RI.Rmd

---
title: "Ipo RI Survey Analysis"
author: "Lorraine Saju"
date: "`r Sys.Date()`"
output: html_document
---

```{r setup, include=FALSE}
library("tidyverse")
library("gplots") # for plotCI
library("scales") # for transparency
library("lme4") # for linear mixed-effect models
library("glmmTMB") # for zero-inflation and hurdle models
library("DHARMa") # for model fits
library("emmeans") # for model comparisons
library("mapdata") # for map data 
library("geosphere") # for distances between points
par(mar=c(5, 4, 4, 2), oma=c(2, 2, 2, 2))
options(scipen = 999)
```

# Define Model Fit Function

This function is used to run multiple diagnostic tests for model fitting.

```{r}
# Define a function that runs multiple model fitting tests 
test_model_fit <- function(model) {
  print(summary(model))
  sim_res <- simulateResiduals(model)
  testZeroInflation(sim_res)
  testOutliers(sim_res)
  testDispersion(sim_res) # simulation test for over/under dispersion
  plot(sim_res)
  hist(resid(model))
}
```

# Population Mapping

Load the population data and visualize the locations of populations used in the survey.

```{r}
mdata <- read.csv(file="Ipo_RI_survey_pops.csv", stringsAsFactors = TRUE, na.strings = c("NA", ""))

par(mar=c(1, 1, 1, 1), oma=c(1, 1, 1, 1))
map("worldHires","usa", xlim=c(-105,-75),ylim=c(15,40), col="gray90", fill=TRUE)
map("worldHires","mexico", xlim=c(-105,-75),ylim=c(15,40), col="gray95", fill=TRUE, add=TRUE)
points(mdata$long, mdata$lat, pch=19, cex=1.5, col=as.numeric(mdata$type))
text(mdata$long, mdata$lat+1, mdata$name)

map("worldHires", "usa", xlim=c(-86,-75), ylim=c(33,38), col="gray90", fill=TRUE)
points(mdata$long, mdata$lat, pch=19, cex=1.5, col=as.numeric(mdata$type))
text(mdata$long, mdata$lat + 0.25, mdata$name)

par(mar=c(5, 4, 4, 2), oma=c(2, 2, 2, 2))
```

# Cross Data Analysis

Load cross data and explore seed weight distributions.

```{r}
# load data 
cross_data <- read.csv(file="Ipo_RI_survey.csv", stringsAsFactors = TRUE, na.strings = c("NA", ""))
cross_data$r.cross.type <- ordered(cross_data$r.cross.type, levels = c("ACxAC","ACxSC","SCxAC","SCxSC","ACxAL","ACxSL","ALxAC","SLxAC","SCxAL","SCxSL","ALxSC","SLxSC","ALxAL","SLxAL","ALxSL","SLxSL"))

# filter out the population "Ocean"
cross_data <- cross_data %>% filter(mom.loc != "Ocean", dad.loc != "Ocean") %>% droplevels()

# look at seed weights
hist(c(cross_data$seed1, cross_data$seed1, cross_data$seed1, cross_data$seed1), breaks = 30, xlab = "Seed weight (mg)", main = NULL)
abline(v = 10, col = "blue", lty = 2)

# make means data
means_dat <- cross_data %>% group_by(focal.plant, mom, mom.pop, mom.type, dad, dad.pop, dad.type, r.sp, cross.type, r.cross.type, cross.loc, intraspecific) %>%
  summarise(mean = mean(n.seeds)) %>%
  as.data.frame %>% filter(., complete.cases(.))
means_dat$r.cross.type <- ordered(means_dat$r.cross.type, levels = c("ACxAC","ACxSC","SCxAC","SCxSC","ACxAL","ACxSL","ALxAC","SLxAC","SCxAL","SCxSL","ALxSC","SLxSC","ALxAL","SLxAL","ALxSL","SLxSL"))

```

# Test for differences between different types of crosses

```{r}
# look at data
stripchart(mean ~ r.cross.type, data=means_dat, las = 2, vertical = TRUE, method = "jitter", ylab = "Mean number of seeds produced per pod", 
           pch = 16, cex = 1.5, lty = 1, col = alpha(c(rep("#AD3C8C", 4), rep("#4259A7", 8), rep("#38B14A", 4)), 0.6))
mosaicplot(table(cross_data$r.cross.type, cross_data$n.seeds), las=2, main=NULL, ylab = "Number of seeds per pod", xlab = "Cross type")
mosaicplot(table(cross_data$r.sp, cross_data$n.seeds), las=2, main=NULL)

# test models of fruit set
full_B_model1 <- glmer(min1seed ~ r.sp + (1 | mom.pop/mom/cross.ind) + (1 | dad.pop/dad), family = "binomial", data = cross_data)
full_B_model2 <- glmer(min1seed ~ r.sp + (1 | mom.pop/mom/cross.ind), family = "binomial", data = cross_data)
full_B_model3 <- glmer(min1seed ~ (1 | mom.pop/mom/cross.ind), family = "binomial", data = cross_data)
anova(full_B_model1, full_B_model2, full_B_model3)
test_model_fit(full_B_model2)
lsmeans(full_B_model2, pairwise ~ r.sp)
contrast(lsmeans(full_B_model2, "r.sp"), list(CvL=c(1,0,0,-1), CvH=c(1,-1,-1,0), LvH=c(0,-1,-1,1), H1vH2=c(0,1,-1,0)))
#test_model_fit(full_B_model1)
#anova(full_B_model1, glmer(min1seed ~ (1 | mom.pop/mom/cross.ind) + (1 | dad.pop/dad), family = "binomial", data = cross_data))
#lsmeans(full_B_model1, pairwise ~ r.sp)
#contrast(lsmeans(full_B_model1, "r.sp"), list(CvL=c(1,0,0,-1), CvH=c(1,-1,-1,0), LvH=c(0,-1,-1,1), H1vH2=c(0,1,-1,0)))

# test models of mean seed number
full_M_model1 <- glmmTMB(mean ~ r.sp + (1 | mom.pop/mom) + (1 | dad.pop/dad),  ziformula=~r.sp, data = means_dat)
full_M_model2 <- glmmTMB(mean ~ r.sp + (1 | mom.pop/mom),  ziformula=~r.sp, data = means_dat)
full_M_model3 <- glmmTMB(mean ~ (1 | mom.pop/mom),  ziformula=~r.sp, data = means_dat)
anova(full_M_model1, full_M_model2, full_M_model3)
test_model_fit(full_M_model2)
lsmeans(full_M_model2, pairwise ~ r.sp)
contrast(lsmeans(full_M_model2, "r.sp"), list(CvL=c(1,0,0,-1), CvH=c(1,-1,-1,0), LvH=c(0,-1,-1,1), H1vH2=c(0,1,-1,0)))
#test_model_fit(full_M_model1)
#anova(full_M_model1, glmmTMB(mean ~ (1 | mom.pop/mom) + (1 | dad.pop/dad),  ziformula=~r.sp, data = means_dat))
#lsmeans(full_M_model1, pairwise ~ r.sp)
#contrast(lsmeans(full_M_model1, "r.sp"), list(CvL=c(1,0,0,-1), CvH=c(1,-1,-1,0), LvH=c(0,-1,-1,1), H1vH2=c(0,1,-1,0)))
```