# attach the packages `dplyr` and `readr`


# use `read_csv()` to import the dataset and assign the data to `Titanic_2`


# get an overview over the data and drop `Name`


# change the column names


# attach the package `corrplot`


# check correlations using `corrplot()`


# attach the packages `dplyr` and `readr`
library(readr)
library(dplyr)

# use `read_csv()` to import the dataset and assign the data to `Titanic_2`
Titanic_2 <- read_csv("https://stanford.io/2O9RUCF")

# get an overview over the data and drop `Name`
summary(Titanic_2)
# or
str(Titanic_2)
# or
head(Titanic_2)

Titanic_2 <- Titanic_2[, -3]

# change the column names
colnames(Titanic_2) <- c("Survived", "Class", "Sex", "Age", "Siblings", "Parents", "Fare")

# attach the package `corrplot`
library(corrplot)

# check correlations using `corrplot()`
corrplot(cor(select_if(Titanic_2, is.numeric)))
# (the highest correlation is between fare and passenger class)


test_object("Titanic_2")
test_or({
test_function("summary", args = "object")
},{
test_function("head", args = "x")
},{
test_function("str", args = "object")
})
test_function("library")
test_function("corrplot", args = "corr")
success_msg("Correct. Unsurprisingly, the highest correlation (-0.55) is between Fare and Class so collinearity is not an issue here.")