[go: up one dir, main page]

0% found this document useful (0 votes)
230 views11 pages

An Introduction To R Language

How to: -->create frames,data,data frames,list,vectors -->import excel files,read from them,write to them -->work with SQL files,read from a database,work with a database,and many other snippets.

Uploaded by

theodor_munteanu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
230 views11 pages

An Introduction To R Language

How to: -->create frames,data,data frames,list,vectors -->import excel files,read from them,write to them -->work with SQL files,read from a database,work with a database,and many other snippets.

Uploaded by

theodor_munteanu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 11

An introduction to R language

Contents from a script fi le:


AN INTRODUCTION TO R ENVIRONMENT
data<-c("East","West","North","East")
print(data)
print(is.factor(data))
factor_data<-factor(data)
print(factor_data)
print(is.factor(factor_data))

height<-c(132,151,162,139,166,147,122)
weight<-c(66,71,70,56,61,54,49)
gender<-c("male","female","male","female","female","female","male")
input_data<-data.frame(height,weight,gender)
print(input_data) #you must select all code in order to compile it#
print(is.factor(input_data$gender))
print(input_data$gender)

data <-
c("East","West","East","North","North","East","West","West","West","East","Nor
th")
# Create the factors
factor_data <- factor(data)
print(factor_data)

# Apply the factor function with required order of the level.


new_order_data <- factor(factor_data,levels = c("East","West","North"))
print(new_order_data)

#Generate levels
v<-gl(3,4,labels=c(1,2,3))
print(v)

#Another example with strings


v<-gl(2,2,labels=c("North","South"))
print(v)

#Arrays
vector1<-c(3,4,5)
vector2<-c(10,9,11,12,10,6)
result<-array(c(vector1,vector2),dim=c(3,3,2))
print(result)

#Name rows and columns


column.names<-c("Col1","Col2","Col3")
row.names<-c("Row1","Row2","Row3")
matrix.names<-c("Matrix1","Matrix2")
result<-
array(c(vector1,vector2),dim=c(3,3,2),dimnames=list(row.names,column.names
,matrix.names))
print(result)
#let's organize a 2by2 array
res<-array(c(1,2,3,4,5,6,7,1),dim=c(4,2))
print(res)
res<-array(c(1,2,3,4,5,6,7,1),dim=c(2,4))
print(res)

#Access array elements


vector1<-c(5,9,3)
vector2<-c(10,11,12,13,14,15)
result<-array(c(vector1,vector2),dim=c(3,3,1))
print(result)
result<-array(c(vector1,vector2,vector1),dim=c(3,2,2))
print(result)
result<-array(c(vector1,vector1),dim=c(2,3))
print(result)
print(result[,2],result[2,])
print(result[2,])

#Col.names,row.names
vector<-array(c(c(1,2,3),c(2,3,4),c(3,4,5)),dim=c(3,3))
print(vector)
col.names<-c("COL1","COL2","COL3")
row.names<-c("R1","R2","R3")
result<-
array(c(c(1,2,3),c(2,3,4),c(3,4,5)),dim=c(3,3),dimnames=list(row.names,
col.names))
print(result)

#Apply a function across the elements


vector1<-c(5,9,3)
vector2<-c(9,11,12,13,14,15)
new.array<-array(c(vector1,vector2),dim=c(3,3))
print(new.array)
result<-apply(new.array,c(1),sum)#this makes the sum on each row
print(result)
result<-apply(new.array,c(2),sum)#this makes the sum on each column
print(result)

#Write to xlsx file


setwd("C:/Users/theodor/Documents/test_r_files")
write.xlsx(vector1,file="test.excelfile.xlsx",sheetName="TestSheet")
write.xlsx(x=vector1,file="test.excelfile.xlsx",sheetName="TestSheet",row.name
s = FALSE)
#Exercise 1
#a)Let's write a dataframe to an excel file
#b)Let's read some data from the same excel file,but another worksheet
#c)Compute the sum of the elements from a specific column
#d)COmpute the average of a row from a worksheet
name<-c("Theodor","Gabriel","Eliza","Margaret","Paul")
age<-c(24,32,41,29,31)
salary<-c(1000,2000,1300,1500,1900)
emp.data<-data.frame(name,age,salary)
write.xlsx(emp.data,file="test2.excelfile.xlsx",sheetName="TestSheet",row.name
s=FALSE)
#b)Reading part
data<-read.xlsx("test2.excelfile.xlsx",sheetIndex = 2)
print(data)
#c)+d)
print(data[,1])
print(data[,2])
x1=data[,1];x2=data[,2]
print(c(sum(x1),sum(x2)))# i compute the sums from each column
print(c(mean(x1),mean(x2)))
#print(c(std(x1),std(x2)))

#NA values:remove them


x<-c(12,7,3,4.2,-21,NA,12,13)
result.mean<-mean(x)
print(result.mean)
result.mean<-mean(x,na.rm=TRUE)
print(result.mean)
x<-c(NA,2,3,4,NA,7,NA,8)
result.median<-median(x,na.rm=TRUE)
print(result.median)

#Print unique values


getmode<-function(v){
uniqv<-unique(v)
uniqv[which.max(tabulate(match(v,uniqv)))]
}
v<-c(2,1,2,3,1,2,3,4,1,5,5,3,2,3,3)
result<-getmode(v)
print(result)
charv<-c("o","it","the","it","it")
result<-getmode(charv)
print(result)

#For loops
v<-LETTERS[1:4]
for (i in v){
print(i)
}
#Nested loops
v1<-LETTERS[1:5]
v2<-LETTERS[1:5]
for (i in v1)
for (j in v2)
{print(c(i,j))}

#Loops with break or next statement


v<-LETTERS[1:6]
for(i in v){if(i=="D"){next} #jumps over letter D
print(i)}

#repeat + break
v<-c("Help","please")
cnt<-2 #counter
repeat
{
print(v)
cnt<-cnt+1
if(cnt>5){break}}
#Exercise:
#set the counter to 6,and repeat printing until counter gets<=2

cnt<-6
repeat{print(v);cnt<-cnt-1;if(cnt<=2){break}}

#while loop
#Given a vector (1,2,3)print each vector c+i,until the second element is
greater than 7
x<-c(1,2,3)

while(x[2]<=7)
{
x<-x+1
print(x)#this prints one more sequence than permitted
}
#we use instead
x<-c(1,2,3)
while(x[2]<=7)
{
print(x)
x<-x+1
}
print(x+1)
print(seq(22,33))
print(c(1:4))
print(c(seq(10,12),c(1:4)))
print(mean(20:23))
print(sum(10:15))
new.function<-function(a)
{
for(i in 1:a)
{
b<-i^2
print(b)
}
}
new.function(10)
#Next part--functions
new.function<-function(a,b,c){
result<-a*b+c
print(result)
}
new.function(5,3,11)
function2<-function(a,b)
{
result<-a*b+a+b
print(result)
}
function2(2,2)
#misstyped arguments
new.function2<-function(a,b){
print(a^2)
print(a)
print(b)#this will give a missing argument error
}
new.function2(6)

#String manipulations
a<-"Hi";b<-",my name is";c<-"Theodor"
print(paste(a,b,c))

#Formatting
result<-format(23.123451234,digits=5)
print(result)
result2<-format(23.3212,digits=3)
print(result2)
result<-format("Hello",digits=4)
print(result)
#result<-format("Hello"23.1222,digits=4)#unexpected numeric constant in
result<-...
print(result)
result<-format("hello,23.1222",digits=4)
print(result)#prints the whole string because the number is between quotations
# to clear the screen you can either press CTRL+L or cat("\014")or cat("\F")

#Scientific notation
result<-format(c(6,13.1415),scientific=TRUE)
print(result)#it prints with e+01
#the minimum number of digits to the right of the decimal point
result<-format(23.4711,nsmall=3)
print(result)#it prints the number as it is
result<-format(23.47,nsmall=3)
print(result)# it adds one zero
result<-format(6)
print(result)# treats 6 as a string
result<-format(13.7,width=6)
print(result)#uses a 6 width character to print 13.7 which means that the
#string is moved to the right with 2 positions

#Left justify the strings


result<-format("Hello",width=8,justify="l")
print(result)#leaves 3 blank spaces to the right

result<-format("Hello",width=8,justify="c")
print(result)#It is printed centered
result<-format("Hello",width=8,justify="r")
print(result)
#number of characters
print(nchar("Hello,my name is Paul"))
x<-"Hello,how are you?"
print(toupper(x))
print(tolower(x))
result<-substring("Extract",5,7)
print(result)

#Lists
list_data<-list("RED","GREEN",c(10,11,21),TRUE,51.24)
print(list_data)

#List with names


list_data<-list(c("Jan","Feb","March"),matrix(c(3,9,5,1,-2,-
8),nrow=2),list("green",12.3))
names(list_data)<-c("1st Quarter","matrix","A_Inner_List")
print(list_data)
print(list_data[1]);print(list_data[1:2])#we hereby print the first two
components
print(list_data$matrix)
print(list_data$`1st Quarter`)
list_data[4]<-"New element" # This element is added to the list_data
print(list_data)
list_data[4]<-NULL
print(list_data)

#Exercise 2
#a)Create a list containing two matrices, defined by nrow and ncol
respectively
list_data<-list(matrix(c(1,2,3,4,5,6),ncol=3),matrix(c(1,2,3,4,5,6),nrow=3))
print(list_data)
#b)create two lists,transform them to vectors and add them after that
list1<-list(1:5)
print(list1)
list2<-list(10:14)
print(list2)
v1<-unlist(list1)
v2<-unlist(list2)
result<-v1+v2
print(result)

#print(list1+list2)#non-numeric argument to binary operator

#Create pies
x<-c(10,21,31,19)
labels<-c("Athens","London","Tokyo","Moscow")
png(file="cities.png")
pie(x,labels)
dev.off()
png(file="city_title_colours.jpg")
pie(x,labels,main="Cities",col=rainbow(length(x)))
dev.off()

x<-c(21,62,10,53)
labels<-c("Athens","New York","Bucharest","Tokyo")
piepercent<-round(100*x/sum(x),1)
png(file="city_pie_percent.jpg")
pie(x,labels=piepercent,main="City pie chart",col=rainbow(length(x)))
legend("topright",c("Athens","New
York","Bucharest","Tokyo"),cex=0.8,fill=rainbow(length(x)))
dev.off()
#there is no pie3D function inside our rStudio
#To install a package you must do like this:
#install.packages("C:\\Program Files\\R\\R-3.3.2\\plotrix_3.6-
4.zip",repos=NULL,type="source")
#not like this:install.packages("plotrix")
pie3D(x,labels=piepercent)#pie3D is a method of plotrix

#BARCHARTS
H<-c(10,21,22,13,14)
png(file="barchart.png")
barplot(H)
dev.off()
H<-c(10,10,11,11,12)
barplot(H)
#How to create a sequence of repeated elements
c<-rep(2,10)# repeats the number 2 10 times
print(c)
c<-rep(10,2)
print(c)
H<-c(rep(10,2),rep(11,4),rep(12,6))
barplot(H)#displays each occurence of 10,of 11 and of 12

#Barchar lable,title and color


H<-c(7,12,28,3,41)
M<-c("Mar","Apr","May","June","July")
png(file="barchart_months_revenue.png")
barplot(H,names.arg=M,xlab="Month",ylab="Revenue",col="blue"
,main="Revenue chart",border="red")#col comes from color
dev.off()

#Group bar chart and Stacked Bar Chart


colors<-c("Green","orange","brown","red")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North","South")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=4,ncol=4,byrow=TRUE)
png(file="barchar_stacked.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topleft",regions,cex=1.3,fill=colors)
dev.off()

# A similar group bar chart but with 3 colors


colors<-c("Green","orange","brown")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=3,ncol=5,byrow=TRUE)
png(file="barchar_stacked2.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topleft",regions,cex=1.3,fill=colors)
dev.off()

#We move now the legend topright


colors<-c("Green","orange","brown")
months<-c("Mar","Apr","May","Jun","July")
regions<-c("East","West","North")
values<-
matrix(c(2,9,11,3,9,4,8,7,3,12,5,2,8,10,11,12),nrow=3,ncol=5,byrow=TRUE)
png(file="barchar_stacked3.png")
barplot(values,main="Total
revenue",names.arg=months,xlab="month",ylab="revenue",col=colors)
legend("topright",regions,cex=1.3,fill=colors)
dev.off()

#Connect to MYSQL
#mysqlconnection=dbConnect(MySQL(),user='root',password='your_password',dbname
='people',host="localhost")

result=dbSendQuery(mysqlconnection,"select * from clients")


data.frame=fetch(result,n=5)
print(data.frame)

result=dbSendQuery(mysqlconnection,"select * from clients")


data.frame=fetch(result)
print(data.frame)

#QUERY WITH FILTER CLAUSE


result=dbSendQuery(mysqlconnection,"select * from clients where
city='London'")
data.frame=fetch(result)
print(data.frame)
print(is.list(data.frame))# gives true-->so from a fetch(result) we get a R-
list

#another filtering example


result=dbSendQuery(mysqlconnection,"select * from clients where age>=30")
data.frame=fetch(result)
print(data.frame)

result=dbSendQuery(mysqlconnection,"select * from clients where age>=23 AND


age<=31")
data.frame=fetch(result)
print(data.frame)

#Update rows in tables


dbSendQuery(mysqlconnection,"update clients set age=25 where
Last_Name='Johnson'")
#Inserting data into tables
dbSendQuery(mysqlconnection,"insert into
clients(Last_Name,First_name,age,city)
values('Hanfstaengl','Helena',35,'Munich')")

#Creating tables into MySQL


dbWriteTable(mysqlconnection,"mtcars",mtcars[, ],overwrite=TRUE)

#Dropping tables in MySQL


dbSendQuery(mysqlconnection,'drop table if exists mtcars')
#Create histograms
v<-c(9,13,21,8,6,2,12,13)
png(file="histogram.png")
hist(v,xlab="Weight",col="yellow",border="blue")
dev.off()

#Create histogram with limits


v<-c(9,12,21,13,8,10,41,33,36,29,42)
png(file="histogram_lim_breaks.png")
hist(v,xlab="weight",col="green",border="blue",xlim=c(0,40),ylim=c(0,5),breaks
=5)
dev.off()

#Plots,line charts
v<-c(7,13,28,3,41)
png(file="line_chart.png")
plot(v,type="o")
dev.off()
#with color and label
png(file="line_chart_with_color.jpg")
plot(v,type="o",col="red",xlab="Month",ylab="Rain fall",main="Rain fall
chart")
dev.off()
#Multiple lines chart
v<-c(7,12,28,3,41)
t<-c(14,7,6,19,3)
png(file="line_chart_2_lines.jpg")
plot(v,type="o",col="red",xlab="Month",ylab="Rain Fall",main="Rain fall
chart")
lines(t,type="o",col="blue")
lines(t+1,type="o",col="blue")
lines(v-1,type="o",col="yellow")
dev.off()

#Scatterplots
input<-mtcars[,c('wt','mpg')]
print(head(input))
input<-mtcars[,c('wt','mpg')]
png(file="scatterplot.png")
plot(x=input$wt,y=input$mpg,
xlab="Weight",
ylab="Mileage",
xlim=c(2.5,5),ylim=c(15,30),
main="Weight vs Mileage")
dev.off()
#Scatterplots matrices
#We give the chart file a name
png(file="scatterplot_matrices.png")
pairs(~wt+mpg+disp+cyl,data=mtcars,main="Scatterplot Matrix")
pairs(~wt+mpg+disp,data=mtcars,main="Scatterplot Matrix")
dev.off()

#LINEAR REGRESSIONS
#Example 1
x<-c(151,174,138,186,128,136,179,163,152,131)
y<-c(63,81,56,91,47,57,76,72,62,48)
relation<-lm(y~x)
print(relation)
#Example 2
x<-c(1,2,3)
y<-c(3,5,8)
relation<-lm(y~x)
print(relation)#Least square estimator are used for the two quotients
#b=intercept,x=a,for f(x)=a*x+b
#SUmmary of the relationship
x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
relation <- lm(y~x)
print(summary(relation))

#predict function
x<-c(151,174,138,186,128,136,179,163,152,131)
y<-c(63,81,56,91,47,57,76,72,62,48)
relation<-lm(y~x)
a<-data.frame(x=179)
result<-predict(relation,a)
print(result)
b<-data.frame(x)f
result2<-predict(relation,b)
print(result2)
d<-data.frame(c(128,136))
result3<-predict(relation,d)
print(result3)

#Multiple regression model


input<-mtcars[,c("mpg","disp","hp","wt")]
print(head(input))
model<-lm(mpg~disp+hp+wt,data=input)
print(model)
cat("# # # # The coefficient values # # #","\n")
a<-coef(model)[1]
print(a)
Xdisp<-coef(model)[2]
Xhp<-coef(model)[3]
Xwt<-coef(model)[4]
print(Xdisp)
print(Xhp)
print(Xwt)
print(typeof(model))# model is a list type
print(model[1])#model[1] gives the coefficients
print(model[2])#model[2] gives the residuals
print(model[3])#model[3] gives the effect
print(model)
print(coef(model))

You might also like