# attach the packages `dplyr` and `readr` # use `read_csv()` to import the dataset and assign the data to `Titanic_2` # get an overview over the data and drop `Name` # change the column names # attach the package `corrplot` # check correlations using `corrplot()` # attach the packages `dplyr` and `readr` library(readr) library(dplyr) # use `read_csv()` to import the dataset and assign the data to `Titanic_2` Titanic_2 <- read_csv("https://stanford.io/2O9RUCF") # get an overview over the data and drop `Name` summary(Titanic_2) # or str(Titanic_2) # or head(Titanic_2) Titanic_2 <- Titanic_2[, -3] # change the column names colnames(Titanic_2) <- c("Survived", "Class", "Sex", "Age", "Siblings", "Parents", "Fare") # attach the package `corrplot` library(corrplot) # check correlations using `corrplot()` corrplot(cor(select_if(Titanic_2, is.numeric))) # (the highest correlation is between fare and passenger class) test_object("Titanic_2") test_or({ test_function("summary", args = "object") },{ test_function("head", args = "x") },{ test_function("str", args = "object") }) test_function("library") test_function("corrplot", args = "corr") success_msg("Correct. Unsurprisingly, the highest correlation (-0.55) is between Fare and Class so collinearity is not an issue here.")