R Introduction
R Introduction
What is R
• for (x in 1:10)
{
print(x)
}
• "Hello World!“
Creating Variables in R
• name
• print(name)
Data Type
floor(1.4)
R Strings
• "hello"
'hello’
• str <- "Hello"
str # print the value of str
• str <- “You are best."
str
str
cat(str)
R Booleans / Logical Values
if (b > a) {
print("b is greater than a")
}
• a <- 33
b <- 33
if (b > a) {
print("b is greater than a")
} else if (a == b) {
print ("a and b are equal")
}
• a <- 200
b <- 33
if (b > a) {
print("b is greater than a")
} else if (a == b) {
print("a and b are equal")
} else {
print("a is greater than b")
}
{
x <- as.integer(readline(prompt = "Enter first number :"))
y <- as.integer(readline(prompt = "Enter second number :"))
z <- as.integer(readline(prompt = "Enter third number :"))
if (x > y) {
if (x > z)
print(paste("Greatest is :", x))
else
print(paste("Greatest is :", z))
} else {
if (y > z)
print(paste("Greatest is :", y))
else{
print(paste("Greatest is :", z))
}
}
}
• x <- 41
if (x > 10) {
print("Above ten")
if (x > 20) {
print("and also above 20!")
} else {
print("but not above 20.")
}
} else {
print("below 10.")
}
• a <- 200
b <- 33
c <- 500
• i <- 1
while (i < 6) {
print(i)
i <- i + 1
}
R For Loop
• for (x in 1:10) {
print(x)
}
• fruits <- list("apple", "banana", "cherry")
for (x in fruits) {
print(x)
}
• dice <- c(1, 2, 3, 4, 5, 6)
for (x in dice) {
print(x)
}
• dice <- 1:6
for(x in dice) {
if (x == 6) {
print(paste("The dice number is",
x, "Yahtzee!"))
} else {
print(paste("The dice number is", x, "Not
Yahtzee"))
}
}
R Vectors
# Print fruits
fruits
• # Vector of numerical values
numbers <- c(1, 2, 3)
# Print numbers
numbers
• # Vector with numerical values in a sequence
numbers <- 1:10
numbers
• # Vector with numerical decimals in a sequence
numbers1 <- 1.5:6.5
numbers1
• # Vector with numerical decimals in a sequence
where the last element is not used
numbers2 <- 1.5:6.3
numbers2
• # Vector of logical values
log_values <- c(TRUE, FALSE, TRUE, FALSE)
log_values
• fruits <- c("banana", "apple", "orange")
length(fruits)
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
numbers <- c(13, 3, 5, 7, 20, 2)
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
# Print fruits
fruits
• repeat_each <- rep(c(1,2,3), each = 3)
repeat_each
• repeat_times <- rep(c(1,2,3), times = 3)
repeat_times
• repeat_indepent <- rep(c(1,2,3), times = c(5,2,1))
repeat_indepent
• numbers <- 1:10
numbers
numbers
Lists
• # List of strings
thislist <- list("apple", "banana", "cherry")
thislist[1]
• thislist <- list("apple", "banana", "cherry")
thislist[1] <- "blackcurrant"
length(thislist)
Matrices
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"), nrow
= 2, ncol = 2)
thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
thismatrix[1, 2]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
thismatrix[2,]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"), nrow
= 2, ncol = 2)
thismatrix[,2]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape",
"pineapple", "pear", "melon", "fig"), nrow = 3, ncol
= 3)
thismatrix[c(1,2),]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape", "pin
eapple", "pear", "melon", "fig"), nrow = 3, ncol = 3)
thismatrix[, c(1,2)]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape", "pin
eapple", "pear", "melon", "fig"), nrow = 3, ncol = 3)
newmatrix <- cbind(thismatrix,
c("strawberry", "blueberry", "raspberry"))
# Print the new matrix
newmatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","gra
pe", "pineapple", "pear", "melon", "fig"), nrow
= 3, ncol = 3)
thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
"apple" %in% thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
dim(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
dim(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
length(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
Matrix2 <-
matrix(c("orange", "mango", "pineapple", "watermelon"), nrow
= 2, ncol = 2)
# Adding it as a rows
Matrix_Combined <- rbind(Matrix1, Matrix2)
Matrix_Combined
# Adding it as a columns
Matrix_Combined <- cbind(Matrix1, Matrix2)
Matrix_Combined
Arrays
• # An array with one dimension with values ranging
from 1 to 24
thisarray <- c(1:24)
thisarray
2 %in% multiarray
dim(multiarray)
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
length(multiarray)
• Data Frames can have different types of data inside it. While the first
column can be character, the second and third can be numeric or
logical. However, each column should have the same type of data.
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame
summary(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame[1]
Data_Frame[["Training"]]
Data_Frame$Training
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
# Add a new row
New_row_DF <- rbind(Data_Frame,
c("Strength", 110, 110))
# Print the new row
New_row_DF
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
# Add a new column
New_col_DF <- cbind(Data_Frame, Steps
= c(1000, 6000, 2000))
# Print the new column
New_col_DF
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
dim(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
ncol(Data_Frame)
nrow(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
length(Data_Frame)
• Data_Frame1 <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame2 <- data.frame (
Training = c("Stamina", "Stamina", "Strength"),
Pulse = c(140, 150, 160),
Duration = c(30, 30, 20)
)
New_Data_Frame <- rbind(Data_Frame1, Data_Frame2)
New_Data_Frame
• Data_Frame3 <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame4 <- data.frame (
Steps = c(3000, 6000, 2000),
Calories = c(300, 400, 300)
)
New_Data_Frame1 <- cbind(Data_Frame3, Data_Frame4)
New_Data_Frame1
R Factors
• Factors are used to categorize data. Examples of factors
are:
• Demography: Male/Female
• Music: Rock, Pop, Classic, Jazz
• Training: Strength, Stamina
• # Create a factor
music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop", "
Jazz", "Rock", "Jazz"), levels =
c("Classic", "Jazz", "Pop", "Rock", "Other"))
levels(music_genre)
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
length(music_genre)
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3]
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3]
error
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3]
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"), levels =
c("Classic", "Jazz", "Pop", "Rock", "Opera"))
music_genre[3]
R plot
• plot(1, 3)
• plot(c(1, 8), c(3, 10))
• plot(c(1, 2, 3, 4, 5), c(3, 7, 8, 9, 12))
• x <- c(1, 2, 3, 4, 5)
y <- c(3, 7, 8, 9, 12)
plot(x, y)
• plot(1:10)
• plot(1:10, type="l")
• plot(1:10, main="My Graph", xlab="The x-axis",
ylab="The y axis")
• plot(1:10, col="red")
• plot(1:10, cex=2)
• #Use cex=number to change the size of the points (1
is default, while 0.5 means 50% smaller, and 2
means 100% larger)
• plot(1:10, pch=25, cex=2)
R Line
• plot(1:10, type="l")
• plot(1:10, type="l", col="blue")
• plot(1:10, type="l", lwd=2)
• Plot(1:10, type="l", lwd=5, lty=3)
• #lty parameter with a value from 0 to 6
• line1 <- c(1,2,3,4,5,10)
line2 <- c(2,5,7,8,9,10)
plot(line1, type = "l", col = "blue")
lines(line2, type="l", col = "red")
R Scatter Plot
• x <- c(5,7,8,7,2,2,9,4,11,12,9,6)
y <- c(99,86,87,88,111,103,87,94,78,77,85,86)
plot(x, y)
• x <- c(5,7,8,7,2,2,9,4,11,12,9,6)
y <- c(99,86,87,88,111,103,87,94,78,77,85,86)
# y-axis values
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x)
• x <- c("A", "B", "C", "D")
y <- c(2, 4, 6, 8)
rownames(Data_Cars)
• Data_Cars <- mtcars
Data_Cars$cyl
• Data_Cars <- mtcars
sort(Data_Cars$cyl)
• Data_Cars <- mtcars
summary(Data_Cars)
• Data_Cars <- mtcars
max(Data_Cars$hp)
min(Data_Cars$hp)
• Data_Cars <- mtcars
which.max(Data_Cars$hp)
which.min(Data_Cars$hp)
• Data_Cars <- mtcars
rownames(Data_Cars)[which.max(Data_Cars$hp)]
rownames(Data_Cars)[which.min(Data_Cars$hp)]
• Data_Cars <- mtcars
mean(Data_Cars$wt)
• Data_Cars <- mtcars
# c() specifies which percentile you want
quantile(Data_Cars$wt, c(0.75))
• Data_Cars <- mtcars
quantile(Data_Cars$wt)