#install.packages("lubridate")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## 1.1 Read Data
data2012 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2012.csv')
data2013 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2013.csv')
data2014 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2014.csv')
data2015 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2015.csv')
data2016 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2016.csv')
## 1.2 Combain 2012 - 2016 data sets
# Rename columns in data2015
names(data2015)[names(data2015) == "Missing.Diagnosis"] <- "Pt...Diagnosis"
names(data2015)[names(data2015) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Rename columns in data2016 if necessary
names(data2016)[names(data2016) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Now, try to bind the data frames again
data <- rbind(data2012, data2013, data2014, data2015, data2016)
## 1.3 Select the columns that we are interesting and rename
select_data <- data.frame(
Date = data$Date,
Weight = data$Pt...Weight,
MOI = data$X6.Categories,
#Twelve_cats = data$X12.Categories,
Arrival = data$Time.Unit.Arrival,
Depart = data$Time.Unit.Depart,
Time_period = data$Calc...Scene.Mins,
Response_priority = data$Resp..Priority,
Transportation_priority = data$Trans..Priority
)
## 1.4 Remove NA Missing Values
cleaned_data <- na.omit(select_data)
colSums(is.na(cleaned_data))
## Date Weight MOI
## 0 0 0
## Arrival Depart Time_period
## 0 0 0
## Response_priority Transportation_priority
## 0 0
## 1.5 Add new Year, Month and Date columns
library(lubridate)
cleaned_data$Year <- year(mdy(cleaned_data$Date))
cleaned_data$Month <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%m"))
cleaned_data$Day <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%d"))
## 1.6 Clean Six
cleaned_data$MOI <- tolower(cleaned_data$MOI)
#cleaned_data$Twelve_cats[cleaned_data$Twelve_cats == "Fall - Other"] <- "Other"
## 1.7 Add new columns tenmins 1 means under ten mins, and 0 means over ten mins Y
cleaned_data$Under_10_mins <- ifelse(cleaned_data$Time_period <= 10, 1, 0)
## 1.8 Convert arrive and depart time to mins
convert_time <- function(time_str) {
time <- strptime(time_str, format="%I:%M:%S %p")
return(as.numeric(format(time, "%H"))*60 + as.numeric(format(time, "%M")) + as.numeric(format(time, "%S"))/60)
}
cleaned_data$Arrival_min <- sapply(cleaned_data$Arrival, convert_time)
cleaned_data$Depart_min <- sapply(cleaned_data$Depart, convert_time)
## 1.9 create new column of 10-mins policy issued
cleaned_data$Date <- as.Date(cleaned_data$Date, format="%m/%d/%Y")
cleaned_data$Intervention <- ifelse(cleaned_data$Date < as.Date("2014-12-18"), "A_pre", "B_post")
# 1.10 New columns of division of the day
# Function to categorize time into part of the day
get_time_of_day <- function(minutes) {
hour <- minutes / 60 # Convert minutes to hours
if (hour >= 5 && hour < 12) {
return('Morning')
} else if (hour >= 12 && hour < 17) {
return('Afternoon')
} else if (hour >= 17 && hour < 21) {
return('Evening')
} else {
return('Night')
}
}
# Apply the function to Arrival_min and Depart_min
cleaned_data$Arrival_Time_of_Day <- sapply(cleaned_data$Arrival_min, get_time_of_day)
cleaned_data$Depart_Time_of_Day <- sapply(cleaned_data$Depart_min, get_time_of_day)
table(cleaned_data$Arrival_Time_of_Day, cleaned_data$Depart_Time_of_Day)
##
## Afternoon Evening Morning Night
## Afternoon 1089 51 0 0
## Evening 0 1191 0 54
## Morning 32 0 881 0
## Night 0 0 21 1638
# 1.11 Drop the errors of time_period
# Replace values in Time_period with NA when they are equal to 1395.00
cleaned_data$Time_period[cleaned_data$Time_period == 1395.00] <- NA
cleaned_data <- na.omit(cleaned_data)
# Check if there are any negative values in Time_period
has_negatives <- any(cleaned_data$Time_period < 0)
# Print the result
print(has_negatives)
## [1] FALSE
summary(cleaned_data$Time_period)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.88 11.00 12.46 15.00 199.00
## 1.12 category to factor
cleaned_data$MOI <- as.factor(cleaned_data$MOI)
#cleaned_data$Twelve_cats <- as.factor(cleaned_data$Twelve_cats)
cleaned_data$Intervention <- as.factor(cleaned_data$Intervention)
cleaned_data$Response_priority <- as.factor(cleaned_data$Response_priority)
cleaned_data$Transportation_priority <- as.factor(cleaned_data$Transportation_priority)
cleaned_data$Under_10_mins <- factor(cleaned_data$Under_10_mins, levels = c(0, 1))
cleaned_data$Arrival_Time_of_Day <- as.factor(cleaned_data$Arrival_Time_of_Day)
cleaned_data$Depart_Time_of_Day <- as.factor(cleaned_data$Depart_Time_of_Day)
colSums(is.na(cleaned_data))
## Date Weight MOI
## 0 0 0
## Arrival Depart Time_period
## 0 0 0
## Response_priority Transportation_priority Year
## 0 0 0
## Month Day Under_10_mins
## 0 0 0
## Arrival_min Depart_min Intervention
## 0 0 0
## Arrival_Time_of_Day Depart_Time_of_Day
## 0 0
## 1.13 Save the final data set
#write.csv(cleaned_data, file = "final_data.csv", row.names = FALSE)
# For numeric
hist(cleaned_data$Weight, main="Distribution of Weight", xlab="Weight (lb)", ylab="Frequency")
# For Category
bp <- function(categorical_data) {
# Check if the input is a factor or character, if not, return an error message
if (!is.factor(categorical_data) && !is.character(categorical_data)) {
stop("The input data must be a categorical (factor or character) variable.")
}
# Calculate counts and percentages
data_summary <- as.data.frame(table(categorical_data)) %>%
rename(Category = categorical_data) %>%
mutate(Percentage = Freq / sum(Freq) * 100)
# Create the plot
ggplot(data_summary, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
position = position_stack(vjust = 0.5), size = 3) +
labs(title = paste("Bar Plot of Year 2012 to 2016"),#,#deparse(substitute(categorical_data))),
x = "Year",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Adjust text angle for readability
}
bp2 <- function(categorical_dataY, categorical_data2) {
# Check if the inputs are categorical
if (!is.factor(categorical_dataY) && !is.character(categorical_dataY)) {
stop("categorical_dataY must be a categorical (factor or character) variable.")
}
if (!is.factor(categorical_data2) && !is.character(categorical_data2)) {
stop("categorical_data2 must be a categorical (factor or character) variable.")
}
# Calculate counts and percentages
data_summary <- as.data.frame(table(categorical_dataY, categorical_data2)) %>%
rename(Category = categorical_dataY, Subcategory = categorical_data2) %>%
group_by(Category) %>%
mutate(Percentage = Freq / sum(Freq) * 100)
# Create the plot
ggplot(data_summary, aes(x = Category, y = Freq, fill = Subcategory)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
position = position_stack(vjust = 0.5), size = 3) +
labs(title = paste("Bar Plot of Intervention vs. Transportation Priority"),#, deparse(substitute(categorical_dataY)), "vs", deparse(substitute(categorical_data2))),
x = "Intervention",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
heatmap_for_two_categories <- function(cat_data1, cat_data2) {
# Check if both inputs are categorical (factor or character)
if (!is.factor(cat_data1) && !is.character(cat_data1)) {
stop("cat_data1 should be a factor or a character vector.")
} else {
cat_data1 <- as.factor(cat_data1)
}
if (!is.factor(cat_data2) && !is.character(cat_data2)) {
stop("cat_data2 should be a factor or a character vector.")
} else {
cat_data2 <- as.factor(cat_data2)
}
# Create a data frame of counts and calculate percentages
data_summary <- data.frame(cat_data1, cat_data2) %>%
count(cat_data1, cat_data2) %>%
mutate(percentage = n / sum(n) * 100)
# Create the heatmap
ggplot(data_summary, aes(x = cat_data1, y = cat_data2, fill = n)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") + # Adjust to shades of red
labs(title = "Heatmap of Category Combinations",
x = deparse(substitute(cat_data1)),
y = deparse(substitute(cat_data2)),
fill = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7), # Adjust text angle and size
axis.text.y = element_text(size = 7)) +
geom_text(aes(label = paste0(n, "\n(", round(percentage, 1), "%)")), size = 3) # Add count and percentage labels
}
# Usage Example
bp(cleaned_data$Under_10_mins)
bp(cleaned_data$Intervention)
bp(cleaned_data$MOI)
bp(cleaned_data$Response_priority)
bp(cleaned_data$Transportation_priority)
bp(as.factor(cleaned_data$Arrival_Time_of_Day))
bp(as.factor(cleaned_data$Depart_Time_of_Day))
bp2(cleaned_data$Under_10_mins, cleaned_data$MOI)
bp2(cleaned_data$Under_10_mins, cleaned_data$Response_priority)
bp2(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority)
bp2(cleaned_data$Under_10_mins, cleaned_data$Intervention)
bp2(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day)
bp2(cleaned_data$Under_10_mins, cleaned_data$Depart_Time_of_Day)
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$MOI) + xlab("Under_10_mins") + ylab("Six Category")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Response_priority) + xlab("Under_10_mins") + ylab("Response priority")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority) + xlab("Under_10_mins") + ylab("Transportation priority")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Intervention) + xlab("Under_10_mins") + ylab("Intervention")
heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day) + xlab("Under_10_mins") + ylab("Arrival Time of Day")
heatmap_for_two_categories(cleaned_data$Intervention, cleaned_data$Transportation_priority) + xlab("Intervention") + ylab("Transportation Priority")
ggplot(cleaned_data, aes(x = Under_10_mins, y = Weight, group = Under_10_mins, color = as.factor(Under_10_mins))) +
geom_boxplot() +
theme_minimal() +
labs(title = "Boxplots of Trauma Scene Times Under_10_mins vs. Patient Weight from 2012 to 2016", x = "Under_10_mins", y = "Weight", color = "Under_10_mins")
summary(cleaned_data$Weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.0 145.0 170.0 169.2 200.0 490.0
hist(cleaned_data$Weight, breaks = 30, main = "Distribution of Weight", xlab = "Weight (lb)", ylab = "Frequency", col = "lightblue", border = "black")
dens <- density(cleaned_data$Weight)
m0.1 <- glm(Under_10_mins ~ Weight + Intervention, data = cleaned_data, family = 'binomial')
m0.2 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day, data = cleaned_data, family = 'binomial')
m0.3 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day + Response_priority + Transportation_priority , data = cleaned_data, family = 'binomial')
summary(m0.1)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3969 -1.0738 -0.9749 1.2224 1.6529
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1038857 0.0960692 1.081 0.28
## Weight -0.0026122 0.0005324 -4.906 9.29e-07 ***
## InterventionB_post 0.4352198 0.0589796 7.379 1.59e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6757.2 on 4952 degrees of freedom
## AIC: 6763.2
##
## Number of Fisher Scoring iterations: 4
summary(m0.2)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day,
## family = "binomial", data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8464 -0.9894 -0.7474 1.1399 1.9704
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1101578 0.1945595 5.706 1.16e-08 ***
## Weight -0.0038456 0.0005679 -6.772 1.27e-11 ***
## InterventionB_post 0.4474462 0.0619568 7.222 5.13e-13 ***
## MOIcrash -1.0015018 0.1745004 -5.739 9.51e-09 ***
## MOIfall -1.4809161 0.1829273 -8.096 5.70e-16 ***
## MOIgsw 0.2280596 0.1871153 1.219 0.22291
## MOIother -0.9355178 0.1803977 -5.186 2.15e-07 ***
## MOIsw 0.3486210 0.1919816 1.816 0.06938 .
## Arrival_Time_of_DayEvening 0.0437772 0.0867203 0.505 0.61369
## Arrival_Time_of_DayMorning -0.1541896 0.0945137 -1.631 0.10281
## Arrival_Time_of_DayNight -0.2303074 0.0838409 -2.747 0.00602 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6302.3 on 4944 degrees of freedom
## AIC: 6324.3
##
## Number of Fisher Scoring iterations: 4
summary(m0.3)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day +
## Response_priority + Transportation_priority, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9173 -0.9947 -0.7098 1.1102 2.0855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2480003 0.2120146 5.886 3.95e-09 ***
## Weight -0.0037984 0.0005698 -6.666 2.64e-11 ***
## InterventionB_post 0.4403035 0.0632615 6.960 3.40e-12 ***
## MOIcrash -1.1567499 0.1781652 -6.493 8.44e-11 ***
## MOIfall -1.3991546 0.1858058 -7.530 5.07e-14 ***
## MOIgsw 0.0523367 0.1908933 0.274 0.78396
## MOIother -0.7940369 0.1848848 -4.295 1.75e-05 ***
## MOIsw 0.2408345 0.1944282 1.239 0.21546
## Arrival_Time_of_DayEvening 0.0153239 0.0873861 0.175 0.86080
## Arrival_Time_of_DayMorning -0.1721206 0.0951113 -1.810 0.07035 .
## Arrival_Time_of_DayNight -0.2697102 0.0846577 -3.186 0.00144 **
## Response_priority2 -0.3815487 0.0925467 -4.123 3.74e-05 ***
## Response_priority3 -0.6301143 0.1318657 -4.778 1.77e-06 ***
## Transportation_priority2 0.1607452 0.0905847 1.775 0.07598 .
## Transportation_priority3 -0.1475596 0.1013079 -1.457 0.14524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6245.1 on 4940 degrees of freedom
## AIC: 6275.1
##
## Number of Fisher Scoring iterations: 4
model1 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + Intervention + MOI + Arrival_Time_of_Day , data = cleaned_data, family = 'binomial')
summary(model1)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority +
## Intervention + MOI + Arrival_Time_of_Day, family = "binomial",
## data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9173 -0.9947 -0.7098 1.1102 2.0855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2480003 0.2120146 5.886 3.95e-09 ***
## Weight -0.0037984 0.0005698 -6.666 2.64e-11 ***
## Response_priority2 -0.3815487 0.0925467 -4.123 3.74e-05 ***
## Response_priority3 -0.6301143 0.1318657 -4.778 1.77e-06 ***
## Transportation_priority2 0.1607452 0.0905847 1.775 0.07598 .
## Transportation_priority3 -0.1475596 0.1013079 -1.457 0.14524
## InterventionB_post 0.4403035 0.0632615 6.960 3.40e-12 ***
## MOIcrash -1.1567499 0.1781652 -6.493 8.44e-11 ***
## MOIfall -1.3991546 0.1858058 -7.530 5.07e-14 ***
## MOIgsw 0.0523367 0.1908933 0.274 0.78396
## MOIother -0.7940369 0.1848848 -4.295 1.75e-05 ***
## MOIsw 0.2408345 0.1944282 1.239 0.21546
## Arrival_Time_of_DayEvening 0.0153239 0.0873861 0.175 0.86080
## Arrival_Time_of_DayMorning -0.1721206 0.0951113 -1.810 0.07035 .
## Arrival_Time_of_DayNight -0.2697102 0.0846577 -3.186 0.00144 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6245.1 on 4940 degrees of freedom
## AIC: 6275.1
##
## Number of Fisher Scoring iterations: 4
\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_1}}} \] \[ \begin{align*} S_1 =\beta X= & \ 1.248 - 0.0038 \times \text{Weight} - 0.382 \times \text{RP2} \\ & - 0.630 \times \text{RP3} + 0.161 \times \text{TP2} \\ & - 0.148 \times \text{TP3} + 0.440 \times \text{Intervention post} \\ & - 1.157 \times \text{Crash} - 1.399 \times \text{Fall} + 0.052 \times \text{GSW} \\ & - 0.794 \times \text{Other} + 0.241 \times \text{SW} \\ & + 0.015 \times \text{Evening} - 0.172 \times \text{Morning} \\ & - 0.269 \times \text{Night} \end{align*} \]
model2 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
summary(model2)
##
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority +
## MOI + Intervention + Arrival_Time_of_Day + Transportation_priority *
## Intervention, family = "binomial", data = cleaned_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9457 -0.9861 -0.7125 1.1008 2.0641
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 1.2833189 0.2160558 5.940
## Weight -0.0038257 0.0005702 -6.709
## Response_priority2 -0.3832384 0.0925574 -4.141
## Response_priority3 -0.6209733 0.1320798 -4.702
## Transportation_priority2 0.0761672 0.1055603 0.722
## Transportation_priority3 -0.1259148 0.1183677 -1.064
## MOIcrash -1.1464167 0.1780727 -6.438
## MOIfall -1.3945810 0.1857158 -7.509
## MOIgsw 0.0600184 0.1908213 0.315
## MOIother -0.7880242 0.1847653 -4.265
## MOIsw 0.2525791 0.1944432 1.299
## InterventionB_post 0.2674668 0.1893956 1.412
## Arrival_Time_of_DayEvening 0.0177034 0.0874384 0.202
## Arrival_Time_of_DayMorning -0.1711538 0.0951700 -1.798
## Arrival_Time_of_DayNight -0.2684162 0.0847338 -3.168
## Transportation_priority2:InterventionB_post 0.2761208 0.2056824 1.342
## Transportation_priority3:InterventionB_post 0.0087868 0.2243956 0.039
## Pr(>|z|)
## (Intercept) 2.85e-09 ***
## Weight 1.96e-11 ***
## Response_priority2 3.46e-05 ***
## Response_priority3 2.58e-06 ***
## Transportation_priority2 0.47057
## Transportation_priority3 0.28744
## MOIcrash 1.21e-10 ***
## MOIfall 5.95e-14 ***
## MOIgsw 0.75312
## MOIother 2.00e-05 ***
## MOIsw 0.19395
## InterventionB_post 0.15789
## Arrival_Time_of_DayEvening 0.83955
## Arrival_Time_of_DayMorning 0.07211 .
## Arrival_Time_of_DayNight 0.00154 **
## Transportation_priority2:InterventionB_post 0.17945
## Transportation_priority3:InterventionB_post 0.96876
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6834.0 on 4954 degrees of freedom
## Residual deviance: 6240.8 on 4938 degrees of freedom
## AIC: 6274.8
##
## Number of Fisher Scoring iterations: 4
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
m2 <- glm(Under_10_mins~Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
m3 <- glm(Under_10_mins~Transportation_priority*Response_priority, data = cleaned_data, family = 'binomial')
plot(allEffects(m2),ask=F)
\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_2}}} \] $$ \[\begin{align*} \text S_2 =\beta X= & \ 1.283 - 0.0038 \times \text{Weight} - 0.383 \times \text{RP2} \\ & - 0.621 \times \text{RP3} + 0.076 \times \text{TP2} \\ & - 0.126 \times \text{TP3} - 1.146 \times \text{Crash} \\ & - 1.395 \times \text{Fall} + 0.060 \times \text{GSW} - 0.788 \times \text{Other} \\ & + 0.253 \times \text{SW} + 0.268 \times \text{Intervention post} \\ & + 0.018 \times \text{Evening} - 0.171 \times \text{Morning} \\ & - 0.268 \times \text{Night} + 0.276 \times \text{TP2*Intervention post} \\ & + 0.0088 \times \text{TP3*Intervention post} \end{align*}\]
$$
library(broom)
model_summary_df1 <- broom::tidy(model1)
print(model_summary_df1)
## # A tibble: 15 Ă— 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.25 0.212 5.89 3.95e- 9
## 2 Weight -0.00380 0.000570 -6.67 2.64e-11
## 3 Response_priority2 -0.382 0.0925 -4.12 3.74e- 5
## 4 Response_priority3 -0.630 0.132 -4.78 1.77e- 6
## 5 Transportation_priority2 0.161 0.0906 1.77 7.60e- 2
## 6 Transportation_priority3 -0.148 0.101 -1.46 1.45e- 1
## 7 InterventionB_post 0.440 0.0633 6.96 3.40e-12
## 8 MOIcrash -1.16 0.178 -6.49 8.44e-11
## 9 MOIfall -1.40 0.186 -7.53 5.07e-14
## 10 MOIgsw 0.0523 0.191 0.274 7.84e- 1
## 11 MOIother -0.794 0.185 -4.29 1.75e- 5
## 12 MOIsw 0.241 0.194 1.24 2.15e- 1
## 13 Arrival_Time_of_DayEvening 0.0153 0.0874 0.175 8.61e- 1
## 14 Arrival_Time_of_DayMorning -0.172 0.0951 -1.81 7.03e- 2
## 15 Arrival_Time_of_DayNight -0.270 0.0847 -3.19 1.44e- 3
model_summary_df2 <- broom::tidy(model2)
print(model_summary_df2)
## # A tibble: 17 Ă— 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1.28 0.216 5.94 2.85e- 9
## 2 Weight -0.00383 0.000570 -6.71 1.96e-11
## 3 Response_priority2 -0.383 0.0926 -4.14 3.46e- 5
## 4 Response_priority3 -0.621 0.132 -4.70 2.58e- 6
## 5 Transportation_priority2 0.0762 0.106 0.722 4.71e- 1
## 6 Transportation_priority3 -0.126 0.118 -1.06 2.87e- 1
## 7 MOIcrash -1.15 0.178 -6.44 1.21e-10
## 8 MOIfall -1.39 0.186 -7.51 5.95e-14
## 9 MOIgsw 0.0600 0.191 0.315 7.53e- 1
## 10 MOIother -0.788 0.185 -4.26 2.00e- 5
## 11 MOIsw 0.253 0.194 1.30 1.94e- 1
## 12 InterventionB_post 0.267 0.189 1.41 1.58e- 1
## 13 Arrival_Time_of_DayEvening 0.0177 0.0874 0.202 8.40e- 1
## 14 Arrival_Time_of_DayMorning -0.171 0.0952 -1.80 7.21e- 2
## 15 Arrival_Time_of_DayNight -0.268 0.0847 -3.17 1.54e- 3
## 16 Transportation_priority2:InterventionB… 0.276 0.206 1.34 1.79e- 1
## 17 Transportation_priority3:InterventionB… 0.00879 0.224 0.0392 9.69e- 1
data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.483 2.308 5.302
## Weight 0.996 0.995 0.997
## Response_priority2 0.683 0.569 0.818
## Response_priority3 0.533 0.410 0.688
## Transportation_priority2 1.174 0.984 1.403
## Transportation_priority3 0.863 0.708 1.053
## InterventionB_post 1.553 1.372 1.758
## MOIcrash 0.315 0.221 0.444
## MOIfall 0.247 0.171 0.354
## MOIgsw 1.054 0.722 1.527
## MOIother 0.452 0.313 0.647
## MOIsw 1.272 0.866 1.857
## Arrival_Time_of_DayEvening 1.015 0.856 1.205
## Arrival_Time_of_DayMorning 0.842 0.699 1.014
## Arrival_Time_of_DayNight 0.764 0.647 0.901
data.frame(round(exp(cbind(Estimate=coef(model2), confint(model2))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.609 2.371 5.535
## Weight 0.996 0.995 0.997
## Response_priority2 0.682 0.568 0.817
## Response_priority3 0.537 0.414 0.695
## Transportation_priority2 1.079 0.878 1.328
## Transportation_priority3 0.882 0.699 1.112
## MOIcrash 0.318 0.223 0.449
## MOIfall 0.248 0.171 0.355
## MOIgsw 1.062 0.727 1.539
## MOIother 0.455 0.315 0.651
## MOIsw 1.287 0.876 1.879
## InterventionB_post 1.307 0.901 1.895
## Arrival_Time_of_DayEvening 1.018 0.858 1.208
## Arrival_Time_of_DayMorning 0.843 0.699 1.015
## Arrival_Time_of_DayNight 0.765 0.647 0.903
## Transportation_priority2:InterventionB_post 1.318 0.880 1.973
## Transportation_priority3:InterventionB_post 1.009 0.650 1.566
#For Weight 10X, 20X, 30X
data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 3.483 2.308 5.302
## Weight 0.996 0.995 0.997
## Response_priority2 0.683 0.569 0.818
## Response_priority3 0.533 0.410 0.688
## Transportation_priority2 1.174 0.984 1.403
## Transportation_priority3 0.863 0.708 1.053
## InterventionB_post 1.553 1.372 1.758
## MOIcrash 0.315 0.221 0.444
## MOIfall 0.247 0.171 0.354
## MOIgsw 1.054 0.722 1.527
## MOIother 0.452 0.313 0.647
## MOIsw 1.272 0.866 1.857
## Arrival_Time_of_DayEvening 1.015 0.856 1.205
## Arrival_Time_of_DayMorning 0.842 0.699 1.014
## Arrival_Time_of_DayNight 0.764 0.647 0.901
data.frame(round(exp(cbind(Estimate=10*coef(model1), 10*confint(model1))),3)) #with 10 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 263024.620 4281.092 17555484.810
## Weight 0.963 0.952 0.974
## Response_priority2 0.022 0.004 0.135
## Response_priority3 0.002 0.000 0.024
## Transportation_priority2 4.990 0.847 29.552
## Transportation_priority3 0.229 0.031 1.669
## InterventionB_post 81.698 23.673 282.711
## MOIcrash 0.000 0.000 0.000
## MOIfall 0.000 0.000 0.000
## MOIgsw 1.688 0.038 68.921
## MOIother 0.000 0.000 0.013
## MOIsw 11.116 0.237 488.622
## Arrival_Time_of_DayEvening 1.166 0.210 6.463
## Arrival_Time_of_DayMorning 0.179 0.028 1.152
## Arrival_Time_of_DayNight 0.067 0.013 0.354
data.frame(round(exp(cbind(Estimate=20*coef(model1), 20*confint(model1))),3)) #with 20 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 6.918195e+10 18327746.158 3.081950e+14
## Weight 9.270000e-01 0.906 9.480000e-01
## Response_priority2 0.000000e+00 0.000 1.800000e-02
## Response_priority3 0.000000e+00 0.000 1.000000e-03
## Transportation_priority2 2.490100e+01 0.718 8.733090e+02
## Transportation_priority3 5.200000e-02 0.001 2.785000e+00
## InterventionB_post 6.674641e+03 560.416 7.992531e+04
## MOIcrash 0.000000e+00 0.000 0.000000e+00
## MOIfall 0.000000e+00 0.000 0.000000e+00
## MOIgsw 2.848000e+00 0.001 4.750124e+03
## MOIother 0.000000e+00 0.000 0.000000e+00
## MOIsw 1.235550e+02 0.056 2.387518e+05
## Arrival_Time_of_DayEvening 1.359000e+00 0.044 4.177600e+01
## Arrival_Time_of_DayMorning 3.200000e-02 0.001 1.326000e+00
## Arrival_Time_of_DayNight 5.000000e-03 0.000 1.250000e-01
data.frame(round(exp(cbind(Estimate=30*coef(model1), 30*confint(model1))),3)) #with 30 time
## Waiting for profiling to be done...
## Estimate X2.5.. X97.5..
## (Intercept) 1.819656e+16 7.846276e+10 5.410513e+21
## Weight 8.920000e-01 8.630000e-01 9.230000e-01
## Response_priority2 0.000000e+00 0.000000e+00 2.000000e-03
## Response_priority3 0.000000e+00 0.000000e+00 0.000000e+00
## Transportation_priority2 1.242570e+02 6.080000e-01 2.580786e+04
## Transportation_priority3 1.200000e-02 0.000000e+00 4.647000e+00
## InterventionB_post 5.453079e+05 1.326680e+04 2.259574e+07
## MOIcrash 0.000000e+00 0.000000e+00 0.000000e+00
## MOIfall 0.000000e+00 0.000000e+00 0.000000e+00
## MOIgsw 4.807000e+00 0.000000e+00 3.273840e+05
## MOIother 0.000000e+00 0.000000e+00 0.000000e+00
## MOIsw 1.373385e+03 1.300000e-02 1.166594e+08
## Arrival_Time_of_DayEvening 1.584000e+00 9.000000e-03 2.700200e+02
## Arrival_Time_of_DayMorning 6.000000e-03 0.000000e+00 1.527000e+00
## Arrival_Time_of_DayNight 0.000000e+00 0.000000e+00 4.400000e-02
# Install ggplot2 if you haven't already
#install.packages("ggplot2")
library(ggplot2)
# Create your data frame
data <- data.frame(
Weight = c(1, 10, 20, 30),
Odds = c(0.996, 0.963, 0.927, 0.892),
LowerCI = c(0.995, 0.952, 0.906, 0.863),
UpperCI = c(0.997, 0.974, 0.948, 0.923)
)
# Create the plot
ggplot(data, aes(x = Weight, y = Odds)) +
geom_point() + # Add points for the odds
geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI), width = 0.2) + # Add error bars for the CI
labs(title = "Odds with 95% Confidence Intervals", x = "Weight (lb)", y = "Odds") +
theme_minimal()
library(sjPlot)
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
plot_model(model1)
plot_model(model2)
# Splitting the data into training and testing sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(cleaned_data), 0.8*nrow(cleaned_data))
train_data <- cleaned_data[train_indices, ]
test_data <- cleaned_data[-train_indices, ]
# train for model 1
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)
##
## predicted_class 0 1
## 0 424 217
## 1 122 228
correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)
## [1] 0.6579213
# ROC Curve and AUC
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Part 5.4 10 folds cross Validation Model1
# Load necessary library
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
x = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]
## (Intercept) Weight
## 0.885106060 -0.003992294
## Response_priority2 Response_priority3
## -0.318922900 -0.619882312
## Transportation_priority2 Transportation_priority3
## 0.157884015 -0.130985523
## MOIcrash MOIfall
## -0.781553546 -0.986576140
## MOIgsw MOIother
## 0.402148748 -0.499294435
## MOIsw InterventionB_post
## 0.541453795 0.430246592
## Arrival_Time_of_DayEvening Arrival_Time_of_DayMorning
## 0.064286529 -0.064900697
## Arrival_Time_of_DayNight
## -0.204986284
# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
"beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))
# Model 1 CV
library(dplyr)
x.test = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day, test_data)[,-1]
logistic.prob1 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob1 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)
## [1] 0
# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic),
as.factor(test_data$Under_10_mins),
dnn = c("True", "Predicted"))
confusion
## Predicted
## True 0 1
## Over 10 Mins 484 287
## Under 10 Mins 62 158
p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))
## [1] 0.6478305
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob1)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 1 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Model2 accuracy
# train for model 2
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)
##
## predicted_class 0 1
## 0 425 207
## 1 121 238
correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)
## [1] 0.6690212
# ROC Curve and AUC
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
library(boot)
library(glmnet)
x = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]
## (Intercept)
## 0.907420145
## Weight
## -0.004017559
## Response_priority2
## -0.316955378
## Response_priority3
## -0.608445964
## Transportation_priority2
## 0.082895427
## Transportation_priority3
## -0.075955950
## MOIcrash
## -0.776001960
## MOIfall
## -0.986994032
## MOIgsw
## 0.407431588
## MOIother
## -0.498903674
## MOIsw
## 0.551874044
## InterventionB_post
## 0.316567843
## Arrival_Time_of_DayEvening
## 0.066076853
## Arrival_Time_of_DayMorning
## -0.063745555
## Arrival_Time_of_DayNight
## -0.206724747
## Transportation_priority2:InterventionB_post
## 0.237366080
## Transportation_priority3:InterventionB_post
## -0.098697821
# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
"beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))
# Model 2 CV
x.test = model.matrix(as.factor(Under_10_mins) ~ Weight + Response_priority + Transportation_priority + MOI + Intervention + Arrival_Time_of_Day + Transportation_priority*Intervention, test_data)[,-1]
logistic.prob2 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob2 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)
## [1] 0
# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic),
as.factor(test_data$Under_10_mins),
dnn = c("True", "Predicted"))
confusion
## Predicted
## True 0 1
## Over 10 Mins 483 291
## Under 10 Mins 63 154
p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))
## [1] 0.6427851
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob2)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 2 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")
# Load necessary library
library(pROC)
# Assuming logistic.prob1 and logistic.prob2 are the predicted probabilities from your two models
roc_obj1 <- roc(test_data$Under_10_mins, logistic.prob1)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
roc_obj2 <- roc(test_data$Under_10_mins, logistic.prob2)
## Setting levels: control = 0, case = 1
## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.
## Setting direction: controls < cases
# Calculate AUC for both models
auc_value1 <- auc(roc_obj1)
auc_value2 <- auc(roc_obj2)
# Plot the first ROC curve
plot(roc_obj1, main="ROC Curves Comparison", xlab="False Positive Rate", ylab="True Positive Rate", col="blue")
# Add the second ROC curve
lines(roc_obj2, col="red")
# Add a legend
legend("bottomright", legend=c(paste("Model 1 - AUC:", round(auc_value1, 3)),
paste("Model 2 - AUC:", round(auc_value2, 3))),
col=c("blue", "red"), lwd=2)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:boot':
##
## logit
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(alr4)
mmps(model1)
## Warning in mmps(model1): Interactions and/or factors skipped
mmps(model2)
## Warning in mmps(model2): Interactions and/or factors skipped
influencePlot(model1)
## StudRes Hat CookD
## 369 2.0653483 0.002508053 0.0012369820
## 2160 1.2045736 0.012737228 0.0009163225
## 2666 2.0926459 0.003789378 0.0019854891
## 3182 -0.9894323 0.015638885 0.0006707911
## 4804 1.9767581 0.004376927 0.0017554449
influencePlot(model2)
## StudRes Hat CookD
## 369 2.037308 0.002838088 0.0011566998
## 2666 2.071426 0.004049923 0.0017814965
## 3182 -1.010576 0.015969792 0.0006378435
## 3975 -1.610586 0.011725467 0.0018376942
## 4342 -1.173510 0.013806754 0.0008160880
plot(resid(model1))
abline(0,0)
plot(resid(model2))
abline(0,0)
data.frame(vif(model1))
## GVIF Df GVIF..1..2.Df..
## Weight 1.046235 1 1.022856
## Response_priority 1.563366 2 1.118189
## Transportation_priority 1.074261 2 1.018069
## Intervention 1.035071 1 1.017384
## MOI 1.685764 5 1.053609
## Arrival_Time_of_Day 1.087726 3 1.014114
data.frame(vif(model2))
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
## GVIF Df GVIF..1..2.Df..
## Weight 1.047189 1 1.023323
## Response_priority 1.566256 2 1.118705
## Transportation_priority 2.310603 2 1.232910
## MOI 1.690581 5 1.053910
## Intervention 9.247406 1 3.040955
## Arrival_Time_of_Day 1.088853 3 1.014289
## Transportation_priority:Intervention 16.493968 2 2.015261
# model1 is our final model
print(paste("Pearson's X^2 =",round(sum(residuals(model1,type="pearson")^2),3)))
## [1] "Pearson's X^2 = 4962.948"
print(qchisq(0.95,4940))
## [1] 5104.625
4962.948 <5104.625
## [1] TRUE
Conclusion: Since the calculated value of Pearson Chi square (4962.948) is less than the critical value (5104.625), we fail to reject the null hypothesis, and conclude that the logistic model fits the data.
print(paste("Pearson's X^2 =",round(sum(residuals(model2,type="pearson")^2),3)))
## [1] "Pearson's X^2 = 4962.531"
print(qchisq(0.95,4938))
## [1] 5102.591
4962.531<5102.591
## [1] TRUE
#model1
R2m1 <- 1-(6245.1/6834)
R2m2 <- 1-(6240.8/6834)
print(R2m1)
## [1] 0.08617208
print(R2m2)
## [1] 0.08680129