r-cheatsheet-ABC (1)

This cheat sheet provides a comprehensive overview of key concepts and commands in Statistics and Data Science, including basic statistics, data manipulation, model fitting, simulation, and visualizations. It includes syntax for R programming, such as calculating means, creating frequency tables, and generating plots. The document serves as a quick reference for performing statistical analyses and visualizations using R.

Uploaded by

Erik Johnson

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views3 pages

r-cheatsheet-ABC (1)

Uploaded by

Erik Johnson

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 3

Statistics and Data Science I (ABC) CHEAT SHEET

Word Equations Summary Tables Simple Statistics

outcome = explanatory + other stuff # compute five-number summary mean(data_set$Y)
Y = X + other stuff favstats(~ Y, data = data_set) var(data_set$Y)
sd(data_set$Y)
# create frequency table
tally(data_set$Y) cohensD(Y ~ X, data = data_set)
Basics tally(~ Y, data = data_set) cor(Y ~ X, data = data_set)
print("Hello world!")
# tally by condition b1(Y ~ X, data = data_set)
# assign value to object tally(~ Y < 1900, data = data_set) b1(one_model)
myNumber <- 5
# two-way frequency table pre(Y ~ X, data = data_set)
# combine values into vector tally(Y ~ X, data = data_set, margin = TRUE, f(Y ~ X, data = data_set)
myVector <- c(1, 2, 3) format = “proportion”)

# first element in vector Data Frame

myVector[1]
# structure of data frame # arrange rows by variable
# orders values or cases str(data_set) arrange(data_set, Y)
sort(myVector)
# view first/last six rows # creates data frame from csv file
# arithmetic operations head(data_set) data_set <- read.csv("file_name", header = TRUE)
sum(1, 2, 100), +, -, *, / tail(data_set)
sqrt(157) # convert quantitative variable
abs(data_set$Y) # select multiple variables # to categorical
select(data_set, Y1, Y2) factor(data_set$Y)
# logical operations factor(data_set$Y, levels = c(1,2), labels =
>, <, >=, <=, ==, !=, |, & # first six rows of selected variables c("A", "B"))
head(select(data_set, Y1, Y2))
# results in a variable with values # transform values
# of TRUE or FALSE recode(data_set$Y, "0" = 0, "1" = 50, "2" = 100)
data_set$C <- data_set$A > data_set$B # select variable (a column)
data_set$Y # creates two equal sized groups
Probability Distribution ntile(data_set$Y, 2)
# calculate the probability area # find rows that meet condition
# convert categorical variable
xpnorm(65.1, data_set$mean, data_set$sd) data_set[data_set$Y > 40] # to quantitative
filter(data_set, Y > 300) as.numeric(data_set$Y)
zscore(data_set$Y) filter(data_set, Y != "NA")

# returns t at this probability

qt(.975, df = 999)
# returns F at this probability
qf(.95, df1 = 1, df2 = 100)

# CI using t distribution
confint(empty_model)

# calculate p-value using F-distribution

xpf(sample_F, df1 = 2 , df2 = 10)

Page: 1 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Statistics and Data Science I (ABC) CHEAT SHEET
Simulation Fitting and Evaluating Models
# sample without replacement # bootstrap sampling distribution of b1s, # empty model
sample(data_set, 6) # centered on sample b1 empty_model <- lm(Y ~ NULL,
sdob1_boot <- do(1000) * data = data_set)
# sample with replacement b1(Y ~ X, data = resample(data_set))
resample(data_set, 10) # use one expanatory variable
# count the number of b1s at the upper one_model <- lm(Y ~ X, data = data_set)
do(3) * resample (data_set, 10) # and lower extreme
tally(sdob1$b1 > sample_b1 | # create a function from a formula
# mixes up values in a variable sdob1$b1 < -sample_b1) one_model_fun <- makeFun(one_model)
shuffle(data_set$Y)
one_model_fun(x_level_1)
# simulate sampling 10000 Ys # return TRUE for middle 95% of distribution
# from normal distribution middle(sdob1$b1, .95) # model predictions and residuals
sim_Y <- rnorm(10000, Y_stats$mean, data_set$empty_predict <- predict(empty_model)
Y_stats$sd) # randomize sampling distribution of PREs data_set$empty_resid <- resid(empty_model)
sdoPRE <- do(1000) * PRE(shuffle(Y) ~ X,
# put simulated Ys into dataframe data = data_set) # produce ANOVA table
data_set<- data.frame(sim_Y) anova(empty_model)
# randomize sampling distribution of Fs supernova(one_model)
# simulate sampling distribution of sdoF <- do(1000) *
means fVal(shuffle(Y) ~ X, data = data_set) # t-test, using pooled variance
sim_SDoM <- do(10000) * mean(rnorm(157, t.test(Tip ~ Condition, data = data_set,
Y_stats$mean, Y_stats$sd)) # counts extreme Fs var.equal=TRUE)
tally(~fVal > sample_F, data = sdoF)
# bootstrap sampling distribution of # pairwise comparison
means # corrections: "Bonferroni" or "none"
bootSDoM <- do(10000) * pairwise(one_model, correction = "none")
mean(resample(data_set$Y, 157))

# randomize sampling distribution

# of b1s, centered on 0
sdob1 <- do(1000) *
b1(shuffle(Y) ~ X, data = data_set)

Page: 2 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Statistics and Data Science I (ABC) CHEAT SHEET
Visualizations
gf_boxplot(Y ~ X, data = data_set) # sampling distribution of b1
gf_histogram(~ Y, data = data_set) %>% gf_histogram(~b1, data = sdob1,
# change labels fill = ~middle(b1, .95)) %>%
gf_labs(title = "Graph Title", x = "Y_Name", # modify the limits on x- and y-axes
y = "Frequency") gf_lims(x = c(-12, 12), y = c(0, 70))

gf_point(Y ~ X, data = data_set)

# faceted grid of histograms

gf_histogram(~ Y, data = data_set) %>%
gf_facet_grid(X ~ .)

gf_point(Y ~ X, data = data_set) %>%

# add model predictions as red points
gf_point(Y ~ X , shape = 1, size = 3,
color = "firebrick") %>%
gf_jitter(Y ~ X, data = data_set) # add best fitting model as a red line
gf_model(one_model, color = “red”)

gf_dhistogram(~ Y, data = data_set, fill =

"orange") %>%
gf_density()

gf_boxplot(Y ~ X, data = data_set, fill =

"orange") %>%
gf_jitter(height = 0, alpha = .2, size = 3)
pairwise(one_model, plot = TRUE)

gf_bar( ~ Y, data = data_set)

Page: 3 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

R Cheat Sheet Merged
100% (2)
R Cheat Sheet Merged
35 pages
R Cheat Sheet
No ratings yet
R Cheat Sheet
4 pages
Sas Clinical Interview Questions and Answers
67% (3)
Sas Clinical Interview Questions and Answers
12 pages
r-cheatsheet-ABC
No ratings yet
r-cheatsheet-ABC
3 pages
r-cheatsheet-ABCD (1)
No ratings yet
r-cheatsheet-ABCD (1)
3 pages
r-cheatsheet-ABCD
No ratings yet
r-cheatsheet-ABCD
3 pages
CourseKata r Cheatsheet ABC (1)
No ratings yet
CourseKata r Cheatsheet ABC (1)
5 pages
r-cheatsheet-ABCD (3)
No ratings yet
r-cheatsheet-ABCD (3)
4 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
R Course
No ratings yet
R Course
7 pages
R Intro 2011
No ratings yet
R Intro 2011
115 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
BAN5
No ratings yet
BAN5
2 pages
R code
No ratings yet
R code
9 pages
IBS Sample I
No ratings yet
IBS Sample I
10 pages
STAT-2450 Assignment 1: Name:, Student ID: B00
No ratings yet
STAT-2450 Assignment 1: Name:, Student ID: B00
9 pages
STTN 225 R Summary
No ratings yet
STTN 225 R Summary
18 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
r file code
No ratings yet
r file code
16 pages
UL2
No ratings yet
UL2
2 pages
R Commands
No ratings yet
R Commands
5 pages
r Program Corrections
No ratings yet
r Program Corrections
20 pages
Chapter 5
No ratings yet
Chapter 5
22 pages
R Cheat Sheet: 1. Basics 4. Input and Export of Data
100% (1)
R Cheat Sheet: 1. Basics 4. Input and Export of Data
4 pages
R_Tutorial
No ratings yet
R_Tutorial
32 pages
R Commands: Appendix B
No ratings yet
R Commands: Appendix B
5 pages
Big Data Slip Solution
No ratings yet
Big Data Slip Solution
18 pages
Ali
No ratings yet
Ali
31 pages
Final Cost Practical
No ratings yet
Final Cost Practical
29 pages
Basics: TH TH TH TH TH TH TH
No ratings yet
Basics: TH TH TH TH TH TH TH
3 pages
R Complete
No ratings yet
R Complete
24 pages
R CODES
No ratings yet
R CODES
5 pages
Commands for Data Analysis using R
No ratings yet
Commands for Data Analysis using R
11 pages
Session Set Working Directory Choose Directlry
No ratings yet
Session Set Working Directory Choose Directlry
17 pages
Huraira
No ratings yet
Huraira
26 pages
Merge
No ratings yet
Merge
28 pages
BCA V SEM Advanced R Programming Lab manual final-1(2)
No ratings yet
BCA V SEM Advanced R Programming Lab manual final-1(2)
5 pages
Statistic And R Programming lab Exercise
No ratings yet
Statistic And R Programming lab Exercise
8 pages
r program
No ratings yet
r program
22 pages
R-Programming-Cheat-Sheet
No ratings yet
R-Programming-Cheat-Sheet
7 pages
Econometrics 2019 PDF
No ratings yet
Econometrics 2019 PDF
143 pages
Mid Sem-1 - Faids
No ratings yet
Mid Sem-1 - Faids
2 pages
Essential R
No ratings yet
Essential R
261 pages
R Intro STAT5000
No ratings yet
R Intro STAT5000
17 pages
R Code
No ratings yet
R Code
13 pages
R Console
No ratings yet
R Console
6 pages
R Tutorial #1: Applied Econometrics (Econ3005)
No ratings yet
R Tutorial #1: Applied Econometrics (Econ3005)
21 pages
R Programming Practical File
No ratings yet
R Programming Practical File
38 pages
probs line of code
No ratings yet
probs line of code
2 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
SML Practical 1to11
No ratings yet
SML Practical 1to11
23 pages
Introduction To R PDF
No ratings yet
Introduction To R PDF
56 pages
4 III BTech Minor DS courses syllabus
No ratings yet
4 III BTech Minor DS courses syllabus
5 pages
COST - JournalPracticals (1-7)
No ratings yet
COST - JournalPracticals (1-7)
22 pages
R语言学习笔记
No ratings yet
R语言学习笔记
78 pages
Practical 1 EDA
No ratings yet
Practical 1 EDA
14 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
R Syntax Examples 1
No ratings yet
R Syntax Examples 1
6 pages
The Effect of Advertising On Consumer Behaviour in Finland 10-2020
No ratings yet
The Effect of Advertising On Consumer Behaviour in Finland 10-2020
16 pages
Orange Visual Programming
No ratings yet
Orange Visual Programming
222 pages
SIMCA-P+ 12 Tutorial
No ratings yet
SIMCA-P+ 12 Tutorial
144 pages
Kabita 708 Work File
No ratings yet
Kabita 708 Work File
3 pages
Functions and Packages
No ratings yet
Functions and Packages
7 pages
Data Distortion Based Privacy Preserving Method For Data Mining System
No ratings yet
Data Distortion Based Privacy Preserving Method For Data Mining System
6 pages
Client Data Set in Detail9
No ratings yet
Client Data Set in Detail9
13 pages
Stat Match
No ratings yet
Stat Match
44 pages
Very Good Examples SAS Before Interview
No ratings yet
Very Good Examples SAS Before Interview
22 pages
Sas Exercise
No ratings yet
Sas Exercise
20 pages
SM Binning
No ratings yet
SM Binning
12 pages
A Robust Dynamic Data Masking Transformation Approach To Safeguard Sensitive Data
No ratings yet
A Robust Dynamic Data Masking Transformation Approach To Safeguard Sensitive Data
5 pages
Module 2 Introduction To SPSS - Word
No ratings yet
Module 2 Introduction To SPSS - Word
17 pages
Npar Tests: Npar Tests /K-W Hasil by Perlakuan (1 2) /missing Analysis
No ratings yet
Npar Tests: Npar Tests /K-W Hasil by Perlakuan (1 2) /missing Analysis
59 pages
SAS Programming Basics
No ratings yet
SAS Programming Basics
26 pages
CEDA Basic Training - 20180425
No ratings yet
CEDA Basic Training - 20180425
114 pages
PROC CONTENTS
No ratings yet
PROC CONTENTS
5 pages
Getting Started With Your Data: Using Stata
No ratings yet
Getting Started With Your Data: Using Stata
32 pages
7 Time Series Datasets For Machine Learning
No ratings yet
7 Time Series Datasets For Machine Learning
8 pages
Pie Charts: NCSS Statistical Software
No ratings yet
Pie Charts: NCSS Statistical Software
8 pages
Biodiversity R
No ratings yet
Biodiversity R
85 pages
CIS Security Metrics-Quick Start Guide v1.0.0 PDF
No ratings yet
CIS Security Metrics-Quick Start Guide v1.0.0 PDF
18 pages
Zhr1000 BDC
No ratings yet
Zhr1000 BDC
3 pages
ADaM IG V1.1draft
No ratings yet
ADaM IG V1.1draft
92 pages
Implementation of CDISC Standards: Presented by Sandeep (Raj) Juneja, ASG Inc., Cary, NC
No ratings yet
Implementation of CDISC Standards: Presented by Sandeep (Raj) Juneja, ASG Inc., Cary, NC
18 pages
CHAPITRE 02 Statistical Series With One Variable
100% (1)
CHAPITRE 02 Statistical Series With One Variable
19 pages
Spss Fisher Exact
No ratings yet
Spss Fisher Exact
24 pages
Sas 9.0 Manual PDF
No ratings yet
Sas 9.0 Manual PDF
1,861 pages