Week5Lab_oueis.
R
macintoshhd
2025-06-05
# Set working directory (adjust the path below to your local folder)
setwd("~/Desktop/LIS 4899_Intro to Data Science")
# Task 1
# copy original dataframe into a new one: my_mtcars
# mtcars object is one of many built-in data sets in R. So you do not
need to worry about creating mtcars.
my_mtcars <- mtcars
# 1: investigate my_mtcars using str function. How many variables and
observations are included in this dataframe?
str(my_mtcars) # Shows structure: number of variables and
observations
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
# 2: calculate engine displacement per cylinder and save it as a new
variable 'UnitEngine' in the dataframe.
# Populate the two XXXX below
my_mtcars$UnitEngine <- my_mtcars$disp / my_mtcars$cyl
# 3. summarize the new variable 'UnitEngine': use summary function
summary(my_mtcars$UnitEngine)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.77 26.92 34.48 35.03 43.19 59.00
# Task 2
# 4. create a numeric vector 'Pets' with this numbers (1,1,1,0,0)
Pets <- c(1, 1, 1, 0, 0)
# 5. create a numeric vector 'Order' with these numbers (3,1,2,3,3)
Order <- c(3, 1, 2, 3, 3)
# create a numeric vector 'Siblings'
Siblings <- c(0, 3, 5, 0, 0)
# create a numeric vector 'IDs'
IDs <- c(1, 2, 3, 4, 5)
# 6. Combine those four numeric vectors together into a dataframe
called 'myFriends'. You must use data.frame function
myFriends <- data.frame(IDs, Pets, Order, Siblings)
# 7. report the structure of the dataframe
str(myFriends)
## 'data.frame': 5 obs. of 4 variables:
## $ IDs : num 1 2 3 4 5
## $ Pets : num 1 1 1 0 0
## $ Order : num 3 1 2 3 3
## $ Siblings: num 0 3 5 0 0
# 8. summarize the dataframe. Use summary function
summary(myFriends)
## IDs Pets Order Siblings
## Min. :1 Min. :0.0 Min. :1.0 Min. :0.0
## 1st Qu.:2 1st Qu.:0.0 1st Qu.:2.0 1st Qu.:0.0
## Median :3 Median :1.0 Median :3.0 Median :0.0
## Mean :3 Mean :0.6 Mean :2.4 Mean :1.6
## 3rd Qu.:4 3rd Qu.:1.0 3rd Qu.:3.0 3rd Qu.:3.0
## Max. :5 Max. :1.0 Max. :3.0 Max. :5.0
# list (or print) all of the values for 'IDs' variable in the
dataframe
myFriends$IDs
## [1] 1 2 3 4 5
# list all of the values for 'Pets' variable in the dataframe
myFriends$Pets
## [1] 1 1 1 0 0
# 9. list all of the values for 'Order' variable in the dataframe
myFriends$Order
## [1] 3 1 2 3 3
# list all of the values for 'Siblings' variable in the dataframe
myFriends$Siblings
## [1] 0 3 5 0 0
# 10. write a code to print the values in the fifth observation of the
Pets variable
myFriends$Pets[5]
## [1] 0
# 11. add a vector called 'age' to 'myFriends' using cbind function.
*** YOU MUST USE cbind FUNCTION to receive full grades.
age <- c(23, 21, 45, 21, 18)
myFriends <- cbind(myFriends, age)
# 12. define a vector called 'names' by including all the names in a
vector.
# Add a vector 'names' to 'myFriends' using cbind function.
# Print the structure of 'myFriends'.
# What is NOT the data type (among: factor, numeric, logical, string)
of the 'names'?
names <- c("John", "Smith", "Susan", "Joe", "Wendy")
myFriends <- cbind(myFriends, names)
str(myFriends)
## 'data.frame': 5 obs. of 6 variables:
## $ IDs : num 1 2 3 4 5
## $ Pets : num 1 1 1 0 0
## $ Order : num 3 1 2 3 3
## $ Siblings: num 0 3 5 0 0
## $ age : num 23 21 45 21 18
## $ names : chr "John" "Smith" "Susan" "Joe" ...
# The 'names' column is of type 'character' (which is a string),
# so among the choices: factor, numeric, logical, string — the one NOT
used is: logical