Click here to return to the main page

Question 1

# Description ------------------
# A script that simulates data for my yellow birch regeneration project.  This script creates simulated data based on the real parameters from my dataset and then runs a linear model and graphs the results.  The independent variable is the size of the fallen over root mounds that the yellow birch are growing on and the dependent variable is the density of yellow birch seedlings growing on the root mound.

#  20 Mar 2022
#SPC

# Initialize -------------------
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
set.seed(3333)


# Load Functions ---------------

# FUNCTION:create_data
# purpose: Create simulated data for a linear model using known parameters from my data
# input: slope - the slope of the response variable
# n - the number of replicates 
# mean - the mean for the resonse variable
# intercept - the y intercept
# output: A data frame containing a simulated normal dataset based on the given parameters
# ------------------------
create_simdata <- function(slope = runif(1), n = 10, mean = 50, sd = 10, intercept = 0) {
  noise <- rnorm(n = n, mean = 0, sd = 2)
  x_val  <- rnorm(n = n, mean = mean, sd = sd)
  y_val <- slope*x_val + noise + intercept
  df <- data.frame(x_val, y_val)
  
  return(df)
}


##########################
# FUNCTION:run_model
# purpose: Use x and y vectors to create a linear model
# input:x and y vectors of the same length 
# output: A summary of the results from a linear model run on these vectors
# ------------------------
run_model <- function(x= NULL, y = NULL) {
  if(is.null(x)) print("No Data!") else
  lm1 <- lm(x ~ y)
  summary(lm1)
}

##########################
# FUNCTION:graph_results
# purpose: Graph the simulated data
# input: x and y vectors of the same length that can be graphed in a scatterplot
# output: A scatterplot of the two given vectors
# ------------------------
graph_results <- function(x_var=runif(10), y_var=runif(10)) {
  d_frame <- data.frame(x_var, y_var)
  reg_plot <- ggplot(data = d_frame) +
    aes(x=x_var, y=y_var) +
    geom_point() +
    stat_smooth(method=lm, se=0.95)
  print(reg_plot)
  message("regression data plotted")
  #ggsave(filename="reg_plot.pdf",
   #      plot=reg_plot,
    #     device="pdf")
}


# Global Variables -------------
df1 <- create_simdata(slope=2.5, n=20, mean=1.8, sd=0.7) #parameters taken from my real dataset


# Program Body -----------------
run_model(x = df1$x_val, y = df1$y_val)
## 
## Call:
## lm(formula = x ~ y)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99111 -0.31008 -0.04706  0.36057  0.90157 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.1849     0.2983   3.972 0.000894 ***
## y             0.1360     0.0512   2.657 0.016045 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4769 on 18 degrees of freedom
## Multiple R-squared:  0.2817, Adjusted R-squared:  0.2418 
## F-statistic:  7.06 on 1 and 18 DF,  p-value: 0.01604
graph_results(x_var = df1$x_val, y_var = df1$y_val)
## `geom_smooth()` using formula 'y ~ x'
## regression data plotted



Question 2 This differs from the above in that instead of using density of seedlings as the dependent variable, it will use the count of seedlings, and thus use a GLM.

# Description ------------------
# A script that simulates data for my yellow birch regeneration project.  This script creates simulated data based on the real parameters from my dataset and then runs a generalized linear model and graphs the results.  The independent variable is the size of the fallen over root mounds that the yellow brich are growing on and the dependent variable is the count of yellow birch seedlings growing on the root mound.  I drew inspiration on how to do this from a discussion with Lauren which led us to talk to and utilize Lindsey Carlson's work on this assignment.

#  20 Mar 2022
#SPC

# Initialize -------------------
library(tidyverse)
library(ggplot2)
set.seed(3333)

# Load Functions ---------------

# FUNCTION:create_data
# purpose: Create simulated data for a generalized linear model 
# input: n - the number of replicates
# lambda - the lambda of the poisson distribution for the count data
# xrange - the known range of values for the independent variable
# output: A data frame containing the simulated data set based on the given parameters
#Idea for using a min and max range came from Lindsey Carlson
# ------------------------
create_simdata <- function(n = 10, lambda = 10, xrange=1:10 ) {
  x_val <- runif(n, min = min(xrange), max = max(xrange)) 
  #lambda1 <- exp(lambda)
  y_val <- rpois(n = n, lambda = lambda)
  df <- data.frame(x_val, y_val)
   return(df)
}



##########################
# FUNCTION:run_model
# purpose: Use x and y vectors to create a generalized linear model
# input:x and y vectors of the same length 
# output: A summary of the coefficients from a generalized linear model run on these vectors
# ------------------------
run_model <- function(x= NULL, y = NULL) {
  if(is.null(x)) print("No Data!") else
    glm1 <- glm(y ~ x, family=poisson)
  return(summary(glm1)$coefficients)
}

##########################
# FUNCTION:graph_results
# purpose: Graph the simulated data
# input: x and y vectors of the same length that can be graphed in a scatterplot
# output: A scatterplot of the two given vectors
# ------------------------
graph_results <- function(x_var=runif(10), y_var=runif(10)) {
  d_frame <- data.frame(x_var, y_var)
  reg_plot <- ggplot(data = d_frame) +
    aes(x=x_var, y=y_var) +
    geom_point() +
    stat_smooth(method=lm, se=0.95)
  print(reg_plot)
  message("regression data plotted")
  #ggsave(filename="reg_plot.pdf",
   #      plot=reg_plot,
    #     device="pdf")
}


# Global Variables -------------
df2 <- create_simdata(n = 20, lambda = 7.4, xrange = 8:75) #these are values taken from my real dataset


# Program Body -----------------
run_model(x = df2$x_val, y = df2$y_val)
##                Estimate  Std. Error   z value     Pr(>|z|)
## (Intercept) 1.942524518 0.221432836 8.7725224 1.747077e-18
## x           0.004304532 0.004601033 0.9355577 3.495009e-01
graph_results(x_var = df2$x_val, y_var = df2$y_val)
## `geom_smooth()` using formula 'y ~ x'
## regression data plotted