Click here to return to the main page
Question 1
# Description ------------------
# A script that simulates data for my yellow birch regeneration project. This script creates simulated data based on the real parameters from my dataset and then runs a linear model and graphs the results. The independent variable is the size of the fallen over root mounds that the yellow birch are growing on and the dependent variable is the density of yellow birch seedlings growing on the root mound.
# 20 Mar 2022
#SPC
# Initialize -------------------
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
set.seed(3333)
# Load Functions ---------------
# FUNCTION:create_data
# purpose: Create simulated data for a linear model using known parameters from my data
# input: slope - the slope of the response variable
# n - the number of replicates
# mean - the mean for the resonse variable
# intercept - the y intercept
# output: A data frame containing a simulated normal dataset based on the given parameters
# ------------------------
create_simdata <- function(slope = runif(1), n = 10, mean = 50, sd = 10, intercept = 0) {
noise <- rnorm(n = n, mean = 0, sd = 2)
x_val <- rnorm(n = n, mean = mean, sd = sd)
y_val <- slope*x_val + noise + intercept
df <- data.frame(x_val, y_val)
return(df)
}
##########################
# FUNCTION:run_model
# purpose: Use x and y vectors to create a linear model
# input:x and y vectors of the same length
# output: A summary of the results from a linear model run on these vectors
# ------------------------
run_model <- function(x= NULL, y = NULL) {
if(is.null(x)) print("No Data!") else
lm1 <- lm(x ~ y)
summary(lm1)
}
##########################
# FUNCTION:graph_results
# purpose: Graph the simulated data
# input: x and y vectors of the same length that can be graphed in a scatterplot
# output: A scatterplot of the two given vectors
# ------------------------
graph_results <- function(x_var=runif(10), y_var=runif(10)) {
d_frame <- data.frame(x_var, y_var)
reg_plot <- ggplot(data = d_frame) +
aes(x=x_var, y=y_var) +
geom_point() +
stat_smooth(method=lm, se=0.95)
print(reg_plot)
message("regression data plotted")
#ggsave(filename="reg_plot.pdf",
# plot=reg_plot,
# device="pdf")
}
# Global Variables -------------
df1 <- create_simdata(slope=2.5, n=20, mean=1.8, sd=0.7) #parameters taken from my real dataset
# Program Body -----------------
run_model(x = df1$x_val, y = df1$y_val)
##
## Call:
## lm(formula = x ~ y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99111 -0.31008 -0.04706 0.36057 0.90157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1849 0.2983 3.972 0.000894 ***
## y 0.1360 0.0512 2.657 0.016045 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4769 on 18 degrees of freedom
## Multiple R-squared: 0.2817, Adjusted R-squared: 0.2418
## F-statistic: 7.06 on 1 and 18 DF, p-value: 0.01604
graph_results(x_var = df1$x_val, y_var = df1$y_val)
## `geom_smooth()` using formula 'y ~ x'
## regression data plotted
Question 2 This differs from the above in that instead of using density of seedlings as the dependent variable, it will use the count of seedlings, and thus use a GLM.
# Description ------------------
# A script that simulates data for my yellow birch regeneration project. This script creates simulated data based on the real parameters from my dataset and then runs a generalized linear model and graphs the results. The independent variable is the size of the fallen over root mounds that the yellow brich are growing on and the dependent variable is the count of yellow birch seedlings growing on the root mound. I drew inspiration on how to do this from a discussion with Lauren which led us to talk to and utilize Lindsey Carlson's work on this assignment.
# 20 Mar 2022
#SPC
# Initialize -------------------
library(tidyverse)
library(ggplot2)
set.seed(3333)
# Load Functions ---------------
# FUNCTION:create_data
# purpose: Create simulated data for a generalized linear model
# input: n - the number of replicates
# lambda - the lambda of the poisson distribution for the count data
# xrange - the known range of values for the independent variable
# output: A data frame containing the simulated data set based on the given parameters
#Idea for using a min and max range came from Lindsey Carlson
# ------------------------
create_simdata <- function(n = 10, lambda = 10, xrange=1:10 ) {
x_val <- runif(n, min = min(xrange), max = max(xrange))
#lambda1 <- exp(lambda)
y_val <- rpois(n = n, lambda = lambda)
df <- data.frame(x_val, y_val)
return(df)
}
##########################
# FUNCTION:run_model
# purpose: Use x and y vectors to create a generalized linear model
# input:x and y vectors of the same length
# output: A summary of the coefficients from a generalized linear model run on these vectors
# ------------------------
run_model <- function(x= NULL, y = NULL) {
if(is.null(x)) print("No Data!") else
glm1 <- glm(y ~ x, family=poisson)
return(summary(glm1)$coefficients)
}
##########################
# FUNCTION:graph_results
# purpose: Graph the simulated data
# input: x and y vectors of the same length that can be graphed in a scatterplot
# output: A scatterplot of the two given vectors
# ------------------------
graph_results <- function(x_var=runif(10), y_var=runif(10)) {
d_frame <- data.frame(x_var, y_var)
reg_plot <- ggplot(data = d_frame) +
aes(x=x_var, y=y_var) +
geom_point() +
stat_smooth(method=lm, se=0.95)
print(reg_plot)
message("regression data plotted")
#ggsave(filename="reg_plot.pdf",
# plot=reg_plot,
# device="pdf")
}
# Global Variables -------------
df2 <- create_simdata(n = 20, lambda = 7.4, xrange = 8:75) #these are values taken from my real dataset
# Program Body -----------------
run_model(x = df2$x_val, y = df2$y_val)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.942524518 0.221432836 8.7725224 1.747077e-18
## x 0.004304532 0.004601033 0.9355577 3.495009e-01
graph_results(x_var = df2$x_val, y_var = df2$y_val)
## `geom_smooth()` using formula 'y ~ x'
## regression data plotted