Load Packages

#install.packages("lubridate")
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

Part 1. Data Clean & Organization

## 1.1 Read Data
data2012 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2012.csv')
data2013 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2013.csv')
data2014 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2014.csv')
data2015 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2015.csv')
data2016 <- read.csv('Copy of Trauma%20Scene%20Times%20Raw%20Data (1).xlsx - 2016.csv')

## 1.2 Combain 2012 - 2016 data sets
# Rename columns in data2015
names(data2015)[names(data2015) == "Missing.Diagnosis"] <- "Pt...Diagnosis"
names(data2015)[names(data2015) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Rename columns in data2016 if necessary
names(data2016)[names(data2016) == "Calc...Transport.Mins"] <- "Calc...Transfer.Mins"
# Now, try to bind the data frames again
data <- rbind(data2012, data2013, data2014, data2015, data2016)

## 1.3 Select the columns that we are interesting and rename
select_data <- data.frame(
  Date = data$Date,
  Weight = data$Pt...Weight,
  MOI = data$X6.Categories,
  #Twelve_cats = data$X12.Categories,
  Arrival = data$Time.Unit.Arrival,
  Depart = data$Time.Unit.Depart,
  Time_period = data$Calc...Scene.Mins,
  Response_priority = data$Resp..Priority,
  Transportation_priority = data$Trans..Priority
)

## 1.4 Remove NA Missing Values
cleaned_data <- na.omit(select_data)
colSums(is.na(cleaned_data))

##                    Date                  Weight                     MOI 
##                       0                       0                       0 
##                 Arrival                  Depart             Time_period 
##                       0                       0                       0 
##       Response_priority Transportation_priority 
##                       0                       0

## 1.5 Add new Year, Month and Date columns
library(lubridate)
cleaned_data$Year <- year(mdy(cleaned_data$Date))
cleaned_data$Month <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%m"))
cleaned_data$Day <- as.numeric(format(as.Date(cleaned_data$Date, format="%m/%d/%Y"), "%d"))

## 1.6 Clean Six
cleaned_data$MOI <- tolower(cleaned_data$MOI)
#cleaned_data$Twelve_cats[cleaned_data$Twelve_cats == "Fall - Other"] <- "Other"

## 1.7 Add new columns tenmins 1 means under ten mins, and 0 means over ten mins Y
cleaned_data$Under_10_mins <- ifelse(cleaned_data$Time_period <= 10, 1, 0)

## 1.8 Convert arrive and depart time to mins
convert_time <- function(time_str) {
  time <- strptime(time_str, format="%I:%M:%S %p")
  return(as.numeric(format(time, "%H"))*60 + as.numeric(format(time, "%M")) + as.numeric(format(time, "%S"))/60)
}

cleaned_data$Arrival_min <- sapply(cleaned_data$Arrival, convert_time)
cleaned_data$Depart_min <- sapply(cleaned_data$Depart, convert_time)


## 1.9 create new column of 10-mins policy issued
cleaned_data$Date <- as.Date(cleaned_data$Date, format="%m/%d/%Y")
cleaned_data$Intervention <- ifelse(cleaned_data$Date < as.Date("2014-12-18"), "A_pre", "B_post")

# 1.10 New columns of division of the day 
# Function to categorize time into part of the day
get_time_of_day <- function(minutes) {
  hour <- minutes / 60  # Convert minutes to hours
  
  if (hour >= 5 && hour < 12) {
    return('Morning')
  } else if (hour >= 12 && hour < 17) {
    return('Afternoon')
  } else if (hour >= 17 && hour < 21) {
    return('Evening')
  } else {
    return('Night')
  }
}

# Apply the function to Arrival_min and Depart_min
cleaned_data$Arrival_Time_of_Day <- sapply(cleaned_data$Arrival_min, get_time_of_day)
cleaned_data$Depart_Time_of_Day <- sapply(cleaned_data$Depart_min, get_time_of_day)
table(cleaned_data$Arrival_Time_of_Day, cleaned_data$Depart_Time_of_Day)

##            
##             Afternoon Evening Morning Night
##   Afternoon      1089      51       0     0
##   Evening           0    1191       0    54
##   Morning          32       0     881     0
##   Night             0       0      21  1638

# 1.11 Drop the errors of time_period
# Replace values in Time_period with NA when they are equal to 1395.00
cleaned_data$Time_period[cleaned_data$Time_period == 1395.00] <- NA
cleaned_data <- na.omit(cleaned_data)
# Check if there are any negative values in Time_period
has_negatives <- any(cleaned_data$Time_period < 0)
# Print the result
print(has_negatives)

## [1] FALSE

summary(cleaned_data$Time_period)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    7.88   11.00   12.46   15.00  199.00

## 1.12 category to factor 
cleaned_data$MOI <- as.factor(cleaned_data$MOI)
#cleaned_data$Twelve_cats <- as.factor(cleaned_data$Twelve_cats)
cleaned_data$Intervention <- as.factor(cleaned_data$Intervention)
cleaned_data$Response_priority <- as.factor(cleaned_data$Response_priority)
cleaned_data$Transportation_priority <- as.factor(cleaned_data$Transportation_priority)
cleaned_data$Under_10_mins <- factor(cleaned_data$Under_10_mins, levels = c(0, 1))
cleaned_data$Arrival_Time_of_Day <- as.factor(cleaned_data$Arrival_Time_of_Day)
cleaned_data$Depart_Time_of_Day <- as.factor(cleaned_data$Depart_Time_of_Day)
colSums(is.na(cleaned_data))

##                    Date                  Weight                     MOI 
##                       0                       0                       0 
##                 Arrival                  Depart             Time_period 
##                       0                       0                       0 
##       Response_priority Transportation_priority                    Year 
##                       0                       0                       0 
##                   Month                     Day           Under_10_mins 
##                       0                       0                       0 
##             Arrival_min              Depart_min            Intervention 
##                       0                       0                       0 
##     Arrival_Time_of_Day      Depart_Time_of_Day 
##                       0                       0

## 1.13 Save the final data set
#write.csv(cleaned_data, file = "final_data.csv", row.names = FALSE)

Part 2. EDA

2.1 Plot the histogram for each numeric varaibles

# For numeric
hist(cleaned_data$Weight, main="Distribution of Weight", xlab="Weight (lb)", ylab="Frequency")

# For Category
bp <- function(categorical_data) {
  # Check if the input is a factor or character, if not, return an error message
  if (!is.factor(categorical_data) && !is.character(categorical_data)) {
    stop("The input data must be a categorical (factor or character) variable.")
  }

  # Calculate counts and percentages
  data_summary <- as.data.frame(table(categorical_data)) %>%
    rename(Category = categorical_data) %>%
    mutate(Percentage = Freq / sum(Freq) * 100)

  # Create the plot
  ggplot(data_summary, aes(x = Category, y = Freq, fill = Category)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
              position = position_stack(vjust = 0.5), size = 3) +
    labs(title = paste("Bar Plot of Year 2012 to 2016"),#,#deparse(substitute(categorical_data))),
         x = "Year",
         y = "Count") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Adjust text angle for readability
}

bp2 <- function(categorical_dataY, categorical_data2) {
  # Check if the inputs are categorical
  if (!is.factor(categorical_dataY) && !is.character(categorical_dataY)) {
    stop("categorical_dataY must be a categorical (factor or character) variable.")
  }
  if (!is.factor(categorical_data2) && !is.character(categorical_data2)) {
    stop("categorical_data2 must be a categorical (factor or character) variable.")
  }

  # Calculate counts and percentages
  data_summary <- as.data.frame(table(categorical_dataY, categorical_data2)) %>%
    rename(Category = categorical_dataY, Subcategory = categorical_data2) %>%
    group_by(Category) %>%
    mutate(Percentage = Freq / sum(Freq) * 100)
  
  # Create the plot
  ggplot(data_summary, aes(x = Category, y = Freq, fill = Subcategory)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = paste0(Freq, " (", round(Percentage, 1), "%)")),
              position = position_stack(vjust = 0.5), size = 3) +
    labs(title = paste("Bar Plot of Intervention vs. Transportation Priority"),#, deparse(substitute(categorical_dataY)), "vs", deparse(substitute(categorical_data2))),
         x = "Intervention",
         y = "Count") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

heatmap_for_two_categories <- function(cat_data1, cat_data2) {
  # Check if both inputs are categorical (factor or character)
  if (!is.factor(cat_data1) && !is.character(cat_data1)) {
    stop("cat_data1 should be a factor or a character vector.")
  } else {
    cat_data1 <- as.factor(cat_data1)
  }
  if (!is.factor(cat_data2) && !is.character(cat_data2)) {
    stop("cat_data2 should be a factor or a character vector.")
  } else {
    cat_data2 <- as.factor(cat_data2)
  }

  # Create a data frame of counts and calculate percentages
  data_summary <- data.frame(cat_data1, cat_data2) %>%
    count(cat_data1, cat_data2) %>%
    mutate(percentage = n / sum(n) * 100)

  # Create the heatmap
  ggplot(data_summary, aes(x = cat_data1, y = cat_data2, fill = n)) +
    geom_tile() +
    scale_fill_gradient(low = "white", high = "red") +  # Adjust to shades of red
    labs(title = "Heatmap of Category Combinations",
         x = deparse(substitute(cat_data1)),
         y = deparse(substitute(cat_data2)),
         fill = "Count") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7),  # Adjust text angle and size
          axis.text.y = element_text(size = 7)) +
    geom_text(aes(label = paste0(n, "\n(", round(percentage, 1), "%)")), size = 3) # Add count and percentage labels
}

# Usage Example
bp(cleaned_data$Under_10_mins)

bp(cleaned_data$Intervention)

bp(cleaned_data$MOI)

bp(cleaned_data$Response_priority)

bp(cleaned_data$Transportation_priority)

bp(as.factor(cleaned_data$Arrival_Time_of_Day))

bp(as.factor(cleaned_data$Depart_Time_of_Day))

bp2(cleaned_data$Under_10_mins, cleaned_data$MOI)

bp2(cleaned_data$Under_10_mins, cleaned_data$Response_priority)

bp2(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority)

bp2(cleaned_data$Under_10_mins, cleaned_data$Intervention)

bp2(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day)

bp2(cleaned_data$Under_10_mins, cleaned_data$Depart_Time_of_Day)

heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$MOI) + xlab("Under_10_mins") + ylab("Six Category")

heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Response_priority) + xlab("Under_10_mins") + ylab("Response priority")

heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Transportation_priority) + xlab("Under_10_mins") + ylab("Transportation priority")

heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Intervention) + xlab("Under_10_mins") + ylab("Intervention")

heatmap_for_two_categories(cleaned_data$Under_10_mins, cleaned_data$Arrival_Time_of_Day) + xlab("Under_10_mins") + ylab("Arrival Time of Day")

heatmap_for_two_categories(cleaned_data$Intervention, cleaned_data$Transportation_priority) + xlab("Intervention") + ylab("Transportation Priority")

ggplot(cleaned_data, aes(x = Under_10_mins, y = Weight, group = Under_10_mins, color = as.factor(Under_10_mins))) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Boxplots of Trauma Scene Times Under_10_mins vs. Patient Weight from 2012 to 2016", x = "Under_10_mins", y = "Weight", color = "Under_10_mins")

summary(cleaned_data$Weight)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0   145.0   170.0   169.2   200.0   490.0

hist(cleaned_data$Weight, breaks = 30, main = "Distribution of Weight", xlab = "Weight (lb)", ylab = "Frequency", col = "lightblue", border = "black")

dens <- density(cleaned_data$Weight)

Part 3 Blockwise Variable Selection

m0.1 <- glm(Under_10_mins ~ Weight + Intervention, data = cleaned_data, family = 'binomial')
m0.2 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day, data = cleaned_data, family = 'binomial')
m0.3 <- glm(Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day + Response_priority + Transportation_priority , data = cleaned_data, family = 'binomial')

summary(m0.1)

## 
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention, family = "binomial", 
##     data = cleaned_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3969  -1.0738  -0.9749   1.2224   1.6529  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         0.1038857  0.0960692   1.081     0.28    
## Weight             -0.0026122  0.0005324  -4.906 9.29e-07 ***
## InterventionB_post  0.4352198  0.0589796   7.379 1.59e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6834.0  on 4954  degrees of freedom
## Residual deviance: 6757.2  on 4952  degrees of freedom
## AIC: 6763.2
## 
## Number of Fisher Scoring iterations: 4

summary(m0.2)

## 
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day, 
##     family = "binomial", data = cleaned_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8464  -0.9894  -0.7474   1.1399   1.9704  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.1101578  0.1945595   5.706 1.16e-08 ***
## Weight                     -0.0038456  0.0005679  -6.772 1.27e-11 ***
## InterventionB_post          0.4474462  0.0619568   7.222 5.13e-13 ***
## MOIcrash                   -1.0015018  0.1745004  -5.739 9.51e-09 ***
## MOIfall                    -1.4809161  0.1829273  -8.096 5.70e-16 ***
## MOIgsw                      0.2280596  0.1871153   1.219  0.22291    
## MOIother                   -0.9355178  0.1803977  -5.186 2.15e-07 ***
## MOIsw                       0.3486210  0.1919816   1.816  0.06938 .  
## Arrival_Time_of_DayEvening  0.0437772  0.0867203   0.505  0.61369    
## Arrival_Time_of_DayMorning -0.1541896  0.0945137  -1.631  0.10281    
## Arrival_Time_of_DayNight   -0.2303074  0.0838409  -2.747  0.00602 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6834.0  on 4954  degrees of freedom
## Residual deviance: 6302.3  on 4944  degrees of freedom
## AIC: 6324.3
## 
## Number of Fisher Scoring iterations: 4

summary(m0.3)

## 
## Call:
## glm(formula = Under_10_mins ~ Weight + Intervention + MOI + Arrival_Time_of_Day + 
##     Response_priority + Transportation_priority, family = "binomial", 
##     data = cleaned_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9173  -0.9947  -0.7098   1.1102   2.0855  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.2480003  0.2120146   5.886 3.95e-09 ***
## Weight                     -0.0037984  0.0005698  -6.666 2.64e-11 ***
## InterventionB_post          0.4403035  0.0632615   6.960 3.40e-12 ***
## MOIcrash                   -1.1567499  0.1781652  -6.493 8.44e-11 ***
## MOIfall                    -1.3991546  0.1858058  -7.530 5.07e-14 ***
## MOIgsw                      0.0523367  0.1908933   0.274  0.78396    
## MOIother                   -0.7940369  0.1848848  -4.295 1.75e-05 ***
## MOIsw                       0.2408345  0.1944282   1.239  0.21546    
## Arrival_Time_of_DayEvening  0.0153239  0.0873861   0.175  0.86080    
## Arrival_Time_of_DayMorning -0.1721206  0.0951113  -1.810  0.07035 .  
## Arrival_Time_of_DayNight   -0.2697102  0.0846577  -3.186  0.00144 ** 
## Response_priority2         -0.3815487  0.0925467  -4.123 3.74e-05 ***
## Response_priority3         -0.6301143  0.1318657  -4.778 1.77e-06 ***
## Transportation_priority2    0.1607452  0.0905847   1.775  0.07598 .  
## Transportation_priority3   -0.1475596  0.1013079  -1.457  0.14524    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6834.0  on 4954  degrees of freedom
## Residual deviance: 6245.1  on 4940  degrees of freedom
## AIC: 6275.1
## 
## Number of Fisher Scoring iterations: 4

Part 4 Logistic Models

4.1 Model 1 without interation

model1 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + Intervention + MOI + Arrival_Time_of_Day , data = cleaned_data, family = 'binomial')
summary(model1)

## 
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority + 
##     Intervention + MOI + Arrival_Time_of_Day, family = "binomial", 
##     data = cleaned_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9173  -0.9947  -0.7098   1.1102   2.0855  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.2480003  0.2120146   5.886 3.95e-09 ***
## Weight                     -0.0037984  0.0005698  -6.666 2.64e-11 ***
## Response_priority2         -0.3815487  0.0925467  -4.123 3.74e-05 ***
## Response_priority3         -0.6301143  0.1318657  -4.778 1.77e-06 ***
## Transportation_priority2    0.1607452  0.0905847   1.775  0.07598 .  
## Transportation_priority3   -0.1475596  0.1013079  -1.457  0.14524    
## InterventionB_post          0.4403035  0.0632615   6.960 3.40e-12 ***
## MOIcrash                   -1.1567499  0.1781652  -6.493 8.44e-11 ***
## MOIfall                    -1.3991546  0.1858058  -7.530 5.07e-14 ***
## MOIgsw                      0.0523367  0.1908933   0.274  0.78396    
## MOIother                   -0.7940369  0.1848848  -4.295 1.75e-05 ***
## MOIsw                       0.2408345  0.1944282   1.239  0.21546    
## Arrival_Time_of_DayEvening  0.0153239  0.0873861   0.175  0.86080    
## Arrival_Time_of_DayMorning -0.1721206  0.0951113  -1.810  0.07035 .  
## Arrival_Time_of_DayNight   -0.2697102  0.0846577  -3.186  0.00144 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6834.0  on 4954  degrees of freedom
## Residual deviance: 6245.1  on 4940  degrees of freedom
## AIC: 6275.1
## 
## Number of Fisher Scoring iterations: 4

\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_1}}} \] \[ \begin{align*} S_1 =\beta X= & \ 1.248 - 0.0038 \times \text{Weight} - 0.382 \times \text{RP2} \\ & - 0.630 \times \text{RP3} + 0.161 \times \text{TP2} \\ & - 0.148 \times \text{TP3} + 0.440 \times \text{Intervention post} \\ & - 1.157 \times \text{Crash} - 1.399 \times \text{Fall} + 0.052 \times \text{GSW} \\ & - 0.794 \times \text{Other} + 0.241 \times \text{SW} \\ & + 0.015 \times \text{Evening} - 0.172 \times \text{Morning} \\ & - 0.269 \times \text{Night} \end{align*} \]

3.1 Model 2 with interation

model2 <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day + Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
summary(model2)

## 
## Call:
## glm(formula = Under_10_mins ~ Weight + Response_priority + Transportation_priority + 
##     MOI + Intervention + Arrival_Time_of_Day + Transportation_priority * 
##     Intervention, family = "binomial", data = cleaned_data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9457  -0.9861  -0.7125   1.1008   2.0641  
## 
## Coefficients:
##                                               Estimate Std. Error z value
## (Intercept)                                  1.2833189  0.2160558   5.940
## Weight                                      -0.0038257  0.0005702  -6.709
## Response_priority2                          -0.3832384  0.0925574  -4.141
## Response_priority3                          -0.6209733  0.1320798  -4.702
## Transportation_priority2                     0.0761672  0.1055603   0.722
## Transportation_priority3                    -0.1259148  0.1183677  -1.064
## MOIcrash                                    -1.1464167  0.1780727  -6.438
## MOIfall                                     -1.3945810  0.1857158  -7.509
## MOIgsw                                       0.0600184  0.1908213   0.315
## MOIother                                    -0.7880242  0.1847653  -4.265
## MOIsw                                        0.2525791  0.1944432   1.299
## InterventionB_post                           0.2674668  0.1893956   1.412
## Arrival_Time_of_DayEvening                   0.0177034  0.0874384   0.202
## Arrival_Time_of_DayMorning                  -0.1711538  0.0951700  -1.798
## Arrival_Time_of_DayNight                    -0.2684162  0.0847338  -3.168
## Transportation_priority2:InterventionB_post  0.2761208  0.2056824   1.342
## Transportation_priority3:InterventionB_post  0.0087868  0.2243956   0.039
##                                             Pr(>|z|)    
## (Intercept)                                 2.85e-09 ***
## Weight                                      1.96e-11 ***
## Response_priority2                          3.46e-05 ***
## Response_priority3                          2.58e-06 ***
## Transportation_priority2                     0.47057    
## Transportation_priority3                     0.28744    
## MOIcrash                                    1.21e-10 ***
## MOIfall                                     5.95e-14 ***
## MOIgsw                                       0.75312    
## MOIother                                    2.00e-05 ***
## MOIsw                                        0.19395    
## InterventionB_post                           0.15789    
## Arrival_Time_of_DayEvening                   0.83955    
## Arrival_Time_of_DayMorning                   0.07211 .  
## Arrival_Time_of_DayNight                     0.00154 ** 
## Transportation_priority2:InterventionB_post  0.17945    
## Transportation_priority3:InterventionB_post  0.96876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6834.0  on 4954  degrees of freedom
## Residual deviance: 6240.8  on 4938  degrees of freedom
## AIC: 6274.8
## 
## Number of Fisher Scoring iterations: 4

4.2 Interation Plot

library(effects)

## Loading required package: carData

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

m2 <- glm(Under_10_mins~Transportation_priority*Intervention, data = cleaned_data, family = 'binomial')
m3 <- glm(Under_10_mins~Transportation_priority*Response_priority, data = cleaned_data, family = 'binomial')
plot(allEffects(m2),ask=F)

\[ P_{Under 10 mins} = \dfrac{1}{1+e^{-{S_2}}} \] $$ \[\begin{align*} \text S_2 =\beta X= & \ 1.283 - 0.0038 \times \text{Weight} - 0.383 \times \text{RP2} \\ & - 0.621 \times \text{RP3} + 0.076 \times \text{TP2} \\ & - 0.126 \times \text{TP3} - 1.146 \times \text{Crash} \\ & - 1.395 \times \text{Fall} + 0.060 \times \text{GSW} - 0.788 \times \text{Other} \\ & + 0.253 \times \text{SW} + 0.268 \times \text{Intervention post} \\ & + 0.018 \times \text{Evening} - 0.171 \times \text{Morning} \\ & - 0.268 \times \text{Night} + 0.276 \times \text{TP2*Intervention post} \\ & + 0.0088 \times \text{TP3*Intervention post} \end{align*}\]

Part 5 Compare two models

5.1 summary tables for model1 and model2

library(broom)
model_summary_df1 <- broom::tidy(model1)
print(model_summary_df1)

## # A tibble: 15 × 5
##    term                       estimate std.error statistic  p.value
##    <chr>                         <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)                 1.25     0.212        5.89  3.95e- 9
##  2 Weight                     -0.00380  0.000570    -6.67  2.64e-11
##  3 Response_priority2         -0.382    0.0925      -4.12  3.74e- 5
##  4 Response_priority3         -0.630    0.132       -4.78  1.77e- 6
##  5 Transportation_priority2    0.161    0.0906       1.77  7.60e- 2
##  6 Transportation_priority3   -0.148    0.101       -1.46  1.45e- 1
##  7 InterventionB_post          0.440    0.0633       6.96  3.40e-12
##  8 MOIcrash                   -1.16     0.178       -6.49  8.44e-11
##  9 MOIfall                    -1.40     0.186       -7.53  5.07e-14
## 10 MOIgsw                      0.0523   0.191        0.274 7.84e- 1
## 11 MOIother                   -0.794    0.185       -4.29  1.75e- 5
## 12 MOIsw                       0.241    0.194        1.24  2.15e- 1
## 13 Arrival_Time_of_DayEvening  0.0153   0.0874       0.175 8.61e- 1
## 14 Arrival_Time_of_DayMorning -0.172    0.0951      -1.81  7.03e- 2
## 15 Arrival_Time_of_DayNight   -0.270    0.0847      -3.19  1.44e- 3

model_summary_df2 <- broom::tidy(model2)
print(model_summary_df2)

## # A tibble: 17 × 5
##    term                                    estimate std.error statistic  p.value
##    <chr>                                      <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)                              1.28     0.216       5.94   2.85e- 9
##  2 Weight                                  -0.00383  0.000570   -6.71   1.96e-11
##  3 Response_priority2                      -0.383    0.0926     -4.14   3.46e- 5
##  4 Response_priority3                      -0.621    0.132      -4.70   2.58e- 6
##  5 Transportation_priority2                 0.0762   0.106       0.722  4.71e- 1
##  6 Transportation_priority3                -0.126    0.118      -1.06   2.87e- 1
##  7 MOIcrash                                -1.15     0.178      -6.44   1.21e-10
##  8 MOIfall                                 -1.39     0.186      -7.51   5.95e-14
##  9 MOIgsw                                   0.0600   0.191       0.315  7.53e- 1
## 10 MOIother                                -0.788    0.185      -4.26   2.00e- 5
## 11 MOIsw                                    0.253    0.194       1.30   1.94e- 1
## 12 InterventionB_post                       0.267    0.189       1.41   1.58e- 1
## 13 Arrival_Time_of_DayEvening               0.0177   0.0874      0.202  8.40e- 1
## 14 Arrival_Time_of_DayMorning              -0.171    0.0952     -1.80   7.21e- 2
## 15 Arrival_Time_of_DayNight                -0.268    0.0847     -3.17   1.54e- 3
## 16 Transportation_priority2:InterventionB…  0.276    0.206       1.34   1.79e- 1
## 17 Transportation_priority3:InterventionB…  0.00879  0.224       0.0392 9.69e- 1

5.2 Odds for model1 and model2

data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))

## Waiting for profiling to be done...

##                            Estimate X2.5.. X97.5..
## (Intercept)                   3.483  2.308   5.302
## Weight                        0.996  0.995   0.997
## Response_priority2            0.683  0.569   0.818
## Response_priority3            0.533  0.410   0.688
## Transportation_priority2      1.174  0.984   1.403
## Transportation_priority3      0.863  0.708   1.053
## InterventionB_post            1.553  1.372   1.758
## MOIcrash                      0.315  0.221   0.444
## MOIfall                       0.247  0.171   0.354
## MOIgsw                        1.054  0.722   1.527
## MOIother                      0.452  0.313   0.647
## MOIsw                         1.272  0.866   1.857
## Arrival_Time_of_DayEvening    1.015  0.856   1.205
## Arrival_Time_of_DayMorning    0.842  0.699   1.014
## Arrival_Time_of_DayNight      0.764  0.647   0.901

data.frame(round(exp(cbind(Estimate=coef(model2), confint(model2))),3))

## Waiting for profiling to be done...

##                                             Estimate X2.5.. X97.5..
## (Intercept)                                    3.609  2.371   5.535
## Weight                                         0.996  0.995   0.997
## Response_priority2                             0.682  0.568   0.817
## Response_priority3                             0.537  0.414   0.695
## Transportation_priority2                       1.079  0.878   1.328
## Transportation_priority3                       0.882  0.699   1.112
## MOIcrash                                       0.318  0.223   0.449
## MOIfall                                        0.248  0.171   0.355
## MOIgsw                                         1.062  0.727   1.539
## MOIother                                       0.455  0.315   0.651
## MOIsw                                          1.287  0.876   1.879
## InterventionB_post                             1.307  0.901   1.895
## Arrival_Time_of_DayEvening                     1.018  0.858   1.208
## Arrival_Time_of_DayMorning                     0.843  0.699   1.015
## Arrival_Time_of_DayNight                       0.765  0.647   0.903
## Transportation_priority2:InterventionB_post    1.318  0.880   1.973
## Transportation_priority3:InterventionB_post    1.009  0.650   1.566

#For Weight 10X, 20X, 30X

data.frame(round(exp(cbind(Estimate=coef(model1), confint(model1))),3))

## Waiting for profiling to be done...

##                            Estimate X2.5.. X97.5..
## (Intercept)                   3.483  2.308   5.302
## Weight                        0.996  0.995   0.997
## Response_priority2            0.683  0.569   0.818
## Response_priority3            0.533  0.410   0.688
## Transportation_priority2      1.174  0.984   1.403
## Transportation_priority3      0.863  0.708   1.053
## InterventionB_post            1.553  1.372   1.758
## MOIcrash                      0.315  0.221   0.444
## MOIfall                       0.247  0.171   0.354
## MOIgsw                        1.054  0.722   1.527
## MOIother                      0.452  0.313   0.647
## MOIsw                         1.272  0.866   1.857
## Arrival_Time_of_DayEvening    1.015  0.856   1.205
## Arrival_Time_of_DayMorning    0.842  0.699   1.014
## Arrival_Time_of_DayNight      0.764  0.647   0.901

data.frame(round(exp(cbind(Estimate=10*coef(model1), 10*confint(model1))),3)) #with 10 time

## Waiting for profiling to be done...

##                              Estimate   X2.5..      X97.5..
## (Intercept)                263024.620 4281.092 17555484.810
## Weight                          0.963    0.952        0.974
## Response_priority2              0.022    0.004        0.135
## Response_priority3              0.002    0.000        0.024
## Transportation_priority2        4.990    0.847       29.552
## Transportation_priority3        0.229    0.031        1.669
## InterventionB_post             81.698   23.673      282.711
## MOIcrash                        0.000    0.000        0.000
## MOIfall                         0.000    0.000        0.000
## MOIgsw                          1.688    0.038       68.921
## MOIother                        0.000    0.000        0.013
## MOIsw                          11.116    0.237      488.622
## Arrival_Time_of_DayEvening      1.166    0.210        6.463
## Arrival_Time_of_DayMorning      0.179    0.028        1.152
## Arrival_Time_of_DayNight        0.067    0.013        0.354

data.frame(round(exp(cbind(Estimate=20*coef(model1), 20*confint(model1))),3)) #with 20 time

## Waiting for profiling to be done...

##                                Estimate       X2.5..      X97.5..
## (Intercept)                6.918195e+10 18327746.158 3.081950e+14
## Weight                     9.270000e-01        0.906 9.480000e-01
## Response_priority2         0.000000e+00        0.000 1.800000e-02
## Response_priority3         0.000000e+00        0.000 1.000000e-03
## Transportation_priority2   2.490100e+01        0.718 8.733090e+02
## Transportation_priority3   5.200000e-02        0.001 2.785000e+00
## InterventionB_post         6.674641e+03      560.416 7.992531e+04
## MOIcrash                   0.000000e+00        0.000 0.000000e+00
## MOIfall                    0.000000e+00        0.000 0.000000e+00
## MOIgsw                     2.848000e+00        0.001 4.750124e+03
## MOIother                   0.000000e+00        0.000 0.000000e+00
## MOIsw                      1.235550e+02        0.056 2.387518e+05
## Arrival_Time_of_DayEvening 1.359000e+00        0.044 4.177600e+01
## Arrival_Time_of_DayMorning 3.200000e-02        0.001 1.326000e+00
## Arrival_Time_of_DayNight   5.000000e-03        0.000 1.250000e-01

data.frame(round(exp(cbind(Estimate=30*coef(model1), 30*confint(model1))),3)) #with 30 time

## Waiting for profiling to be done...

##                                Estimate       X2.5..      X97.5..
## (Intercept)                1.819656e+16 7.846276e+10 5.410513e+21
## Weight                     8.920000e-01 8.630000e-01 9.230000e-01
## Response_priority2         0.000000e+00 0.000000e+00 2.000000e-03
## Response_priority3         0.000000e+00 0.000000e+00 0.000000e+00
## Transportation_priority2   1.242570e+02 6.080000e-01 2.580786e+04
## Transportation_priority3   1.200000e-02 0.000000e+00 4.647000e+00
## InterventionB_post         5.453079e+05 1.326680e+04 2.259574e+07
## MOIcrash                   0.000000e+00 0.000000e+00 0.000000e+00
## MOIfall                    0.000000e+00 0.000000e+00 0.000000e+00
## MOIgsw                     4.807000e+00 0.000000e+00 3.273840e+05
## MOIother                   0.000000e+00 0.000000e+00 0.000000e+00
## MOIsw                      1.373385e+03 1.300000e-02 1.166594e+08
## Arrival_Time_of_DayEvening 1.584000e+00 9.000000e-03 2.700200e+02
## Arrival_Time_of_DayMorning 6.000000e-03 0.000000e+00 1.527000e+00
## Arrival_Time_of_DayNight   0.000000e+00 0.000000e+00 4.400000e-02

# Install ggplot2 if you haven't already
#install.packages("ggplot2")
library(ggplot2)

# Create your data frame
data <- data.frame(
  Weight = c(1, 10, 20, 30),
  Odds = c(0.996, 0.963, 0.927, 0.892),
  LowerCI = c(0.995, 0.952, 0.906, 0.863),
  UpperCI = c(0.997, 0.974, 0.948, 0.923)
)

# Create the plot
ggplot(data, aes(x = Weight, y = Odds)) +
  geom_point() +  # Add points for the odds
  geom_errorbar(aes(ymin = LowerCI, ymax = UpperCI), width = 0.2) +  # Add error bars for the CI
  labs(title = "Odds with 95% Confidence Intervals", x = "Weight (lb)", y = "Odds") +
  theme_minimal()

5.3 Confidence Interval for model1 and model2

library(sjPlot)

## Learn more about sjPlot with 'browseVignettes("sjPlot")'.

plot_model(model1)

plot_model(model2)

5.4 Measure of Accuracy for model 1 and model2

# Splitting the data into training and testing sets
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(cleaned_data), 0.8*nrow(cleaned_data))
train_data <- cleaned_data[train_indices, ]
test_data <- cleaned_data[-train_indices, ]

# train for model 1
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)

##                
## predicted_class   0   1
##               0 424 217
##               1 122 228

correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)

## [1] 0.6579213

# ROC Curve and AUC
library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")

# Part 5.4 10 folds cross Validation Model1

# Load necessary library
library(boot)

## 
## Attaching package: 'boot'

## The following object is masked from 'package:psych':
## 
##     logit

library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-8

x = model.matrix(as.factor(Under_10_mins) ~  Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]

##                (Intercept)                     Weight 
##                0.885106060               -0.003992294 
##         Response_priority2         Response_priority3 
##               -0.318922900               -0.619882312 
##   Transportation_priority2   Transportation_priority3 
##                0.157884015               -0.130985523 
##                   MOIcrash                    MOIfall 
##               -0.781553546               -0.986576140 
##                     MOIgsw                   MOIother 
##                0.402148748               -0.499294435 
##                      MOIsw         InterventionB_post 
##                0.541453795                0.430246592 
## Arrival_Time_of_DayEvening Arrival_Time_of_DayMorning 
##                0.064286529               -0.064900697 
##   Arrival_Time_of_DayNight 
##               -0.204986284

# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
                      lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
         "beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))

# Model 1 CV
library(dplyr)
x.test = model.matrix(as.factor(Under_10_mins) ~  Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day, test_data)[,-1]
logistic.prob1 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob1 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)

## [1] 0

# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic), 
                  as.factor(test_data$Under_10_mins), 
                  dnn = c("True", "Predicted"))
confusion

##                Predicted
## True              0   1
##   Over 10 Mins  484 287
##   Under 10 Mins  62 158

p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))

## [1] 0.6478305

library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob1)

## Setting levels: control = 0, case = 1

## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.

## Setting direction: controls < cases

# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 1 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")

# Model2 accuracy

# train for model 2
train_model <- glm(Under_10_mins ~ Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day + Transportation_priority*Intervention, data = train_data, family = 'binomial')
# Predict on test data
predictions <- predict(train_model, test_data, type = "response")
# Convert predictions to binary outcome
predicted_class <- ifelse(predictions > 0.5, 1, 0)
# Confusion Matrix
confusion_matrix<-table(predicted_class, test_data$Under_10_mins)
print(confusion_matrix)

##                
## predicted_class   0   1
##               0 425 207
##               1 121 238

correct_predictions <- sum(diag(confusion_matrix))
total_observations <- nrow(test_data)
accuracy_rate <- correct_predictions / total_observations
print(accuracy_rate)

## [1] 0.6690212

# ROC Curve and AUC
library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, predictions)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("ROC Curve - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")

5.4 10 folds Cross Validation for model 2

library(boot)
library(glmnet)
x = model.matrix(as.factor(Under_10_mins) ~  Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day + Transportation_priority*Intervention, train_data)[,-1]
y = train_data$Under_10_mins
# Logistic
cv.glmnet= cv.glmnet(x, y , alpha = 0, family = "binomial", nfolds = 10)
coef(cv.glmnet, cv.glmnet$lambda.min)[,1]

##                                 (Intercept) 
##                                 0.907420145 
##                                      Weight 
##                                -0.004017559 
##                          Response_priority2 
##                                -0.316955378 
##                          Response_priority3 
##                                -0.608445964 
##                    Transportation_priority2 
##                                 0.082895427 
##                    Transportation_priority3 
##                                -0.075955950 
##                                    MOIcrash 
##                                -0.776001960 
##                                     MOIfall 
##                                -0.986994032 
##                                      MOIgsw 
##                                 0.407431588 
##                                    MOIother 
##                                -0.498903674 
##                                       MOIsw 
##                                 0.551874044 
##                          InterventionB_post 
##                                 0.316567843 
##                  Arrival_Time_of_DayEvening 
##                                 0.066076853 
##                  Arrival_Time_of_DayMorning 
##                                -0.063745555 
##                    Arrival_Time_of_DayNight 
##                                -0.206724747 
## Transportation_priority2:InterventionB_post 
##                                 0.237366080 
## Transportation_priority3:InterventionB_post 
##                                -0.098697821

# Logistic Model
logistic.model = glmnet(x, y, alpha = 0, family = "binomial",
                      lambda = cv.glmnet$lambda.min)
Coefficient = c("beta.0", "beta.1", "beta.2", "beta.3", "beta.4", "beta.5",
         "beta.6", "beta.7", "beta.8", "beta.9", "beta.10", "beta.11", "beta.12", "beta.13", "beta.14")
Estimation = unname(round(-coef(cv.glmnet, cv.glmnet$lambda.min)[,1], 5))

# Model 2 CV
x.test = model.matrix(as.factor(Under_10_mins) ~  Weight + Response_priority + Transportation_priority + MOI + Intervention  + Arrival_Time_of_Day + Transportation_priority*Intervention, test_data)[,-1]
logistic.prob2 = logistic.model %>% predict(newx = x.test)
predicted.classes.logistic = ifelse(logistic.prob2 > 0.5, "Under 10 Mins", "Over 10 Mins")
# Model accuracy
observed.classes.logistic = test_data$Under_10_mins
mean(predicted.classes.logistic == observed.classes.logistic)

## [1] 0

# Confusion matrix
confusion = table(as.factor(predicted.classes.logistic), 
                  as.factor(test_data$Under_10_mins), 
                  dnn = c("True", "Predicted"))
confusion

##                Predicted
## True              0   1
##   Over 10 Mins  483 291
##   Under 10 Mins  63 154

p.logistic = logistic.model %>% predict(newx = x)
# Accuracy
print(sum(diag(confusion)) / sum(confusion))

## [1] 0.6427851

library(pROC)
# Generate the ROC object
roc_obj <- roc(test_data$Under_10_mins, logistic.prob2)

## Setting levels: control = 0, case = 1

## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.

## Setting direction: controls < cases

# Plot the ROC Curve with AUC
auc_value <- auc(roc_obj)
plot(roc_obj, main=paste("Model 2 - ROC Curve with 10-flods Cross Validation - AUC:", round(auc_value, 3)), xlab="False Positive Rate", ylab="True Positive Rate")

# Load necessary library
library(pROC)

# Assuming logistic.prob1 and logistic.prob2 are the predicted probabilities from your two models
roc_obj1 <- roc(test_data$Under_10_mins, logistic.prob1)

## Setting levels: control = 0, case = 1

## Warning in roc.default(test_data$Under_10_mins, logistic.prob1): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.

## Setting direction: controls < cases

roc_obj2 <- roc(test_data$Under_10_mins, logistic.prob2)

## Setting levels: control = 0, case = 1

## Warning in roc.default(test_data$Under_10_mins, logistic.prob2): Deprecated use
## a matrix as predictor. Unexpected results may be produced, please pass a
## numeric vector.

## Setting direction: controls < cases

# Calculate AUC for both models
auc_value1 <- auc(roc_obj1)
auc_value2 <- auc(roc_obj2)

# Plot the first ROC curve
plot(roc_obj1, main="ROC Curves Comparison", xlab="False Positive Rate", ylab="True Positive Rate", col="blue")
# Add the second ROC curve
lines(roc_obj2, col="red")

# Add a legend
legend("bottomright", legend=c(paste("Model 1 - AUC:", round(auc_value1, 3)), 
                               paste("Model 2 - AUC:", round(auc_value2, 3))),
       col=c("blue", "red"), lwd=2)

5.5 Marginal Model Plot

library(car)

## 
## Attaching package: 'car'

## The following object is masked from 'package:boot':
## 
##     logit

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

library(alr4)
mmps(model1)

## Warning in mmps(model1): Interactions and/or factors skipped

mmps(model2)

## Warning in mmps(model2): Interactions and/or factors skipped

5.6 Influencial Plots

influencePlot(model1)

##         StudRes         Hat        CookD
## 369   2.0653483 0.002508053 0.0012369820
## 2160  1.2045736 0.012737228 0.0009163225
## 2666  2.0926459 0.003789378 0.0019854891
## 3182 -0.9894323 0.015638885 0.0006707911
## 4804  1.9767581 0.004376927 0.0017554449

influencePlot(model2)

##        StudRes         Hat        CookD
## 369   2.037308 0.002838088 0.0011566998
## 2666  2.071426 0.004049923 0.0017814965
## 3182 -1.010576 0.015969792 0.0006378435
## 3975 -1.610586 0.011725467 0.0018376942
## 4342 -1.173510 0.013806754 0.0008160880

5.7 Residual Plot

plot(resid(model1))
abline(0,0)

plot(resid(model2))
abline(0,0)

Multicollinearty

data.frame(vif(model1))

##                             GVIF Df GVIF..1..2.Df..
## Weight                  1.046235  1        1.022856
## Response_priority       1.563366  2        1.118189
## Transportation_priority 1.074261  2        1.018069
## Intervention            1.035071  1        1.017384
## MOI                     1.685764  5        1.053609
## Arrival_Time_of_Day     1.087726  3        1.014114

data.frame(vif(model2))

## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif

##                                           GVIF Df GVIF..1..2.Df..
## Weight                                1.047189  1        1.023323
## Response_priority                     1.566256  2        1.118705
## Transportation_priority               2.310603  2        1.232910
## MOI                                   1.690581  5        1.053910
## Intervention                          9.247406  1        3.040955
## Arrival_Time_of_Day                   1.088853  3        1.014289
## Transportation_priority:Intervention 16.493968  2        2.015261

Part 6 Final model

# model1 is our final model

Model 1 Pearson’s Chi-Square Test Result

print(paste("Pearson's X^2 =",round(sum(residuals(model1,type="pearson")^2),3)))

## [1] "Pearson's X^2 = 4962.948"

print(qchisq(0.95,4940))

## [1] 5104.625

4962.948 <5104.625

## [1] TRUE

Conclusion: Since the calculated value of Pearson Chi square (4962.948) is less than the critical value (5104.625), we fail to reject the null hypothesis, and conclude that the logistic model fits the data.

Model 2 Pearson’s Chi-Square Test Result

print(paste("Pearson's X^2 =",round(sum(residuals(model2,type="pearson")^2),3)))

## [1] "Pearson's X^2 = 4962.531"

print(qchisq(0.95,4938))

## [1] 5102.591

4962.531<5102.591

## [1] TRUE

Pseudo R^2

#model1
R2m1 <- 1-(6245.1/6834)
R2m2 <- 1-(6240.8/6834)
print(R2m1)

## [1] 0.08617208

print(R2m2)

## [1] 0.08680129

STATS 402 Final Presentation code

Group 1

2023-12-04