SAS Virtual Learning Environment Sessions: www.sas.
com
Lesson 1 Essentials libname '/people/m277325/consult/EPG1V2/data’
/ /
********************************************* *********************************************
**/ */
/* In the line of code below, replace /* You must run SETUP.SAS before you run
FILEPATH */ */
/* with the path to your EPG1V2/data folder /* this code to create your data for EPG1V2
*/ */
/ /*
********************************************* */
**/ /* DO NOT EDIT THE CODE BELOW
*/
%let /
path=/people/m277325/consult/EPG1V2/data;
data myclass; /* Reads sashelp.class table and creates a copy called my class*/ Data Step vs Proc Step
/ set sashelp.class; - Data: creates/edits info for
run; a table
- Proc: processes a table in a
proc print data=myclass; specific, pre-defined way
run; libname statement
libname libref engine “path”;
data work.shoes; libname: creates library
set sashelp.shoes; - Must be <= 8 characters
NetSales=Sales-Returns; /*This creates a new variable*/ - Start w/ letter or _
run; libref: library name
engine: set of instructions
proc means data=work.shoes mean sum maxdec=2; /*This rounds the statistics to 2 decimal path: location
places*/ ex: libname mylib
var NetSales; *Gives us info. about NetSales by region; “s:/workshop/data”;
class region;
Lesson
run; 2 Accessing Data libname '/people/m277325/consult/EPG1V2/data
***********************************************************; /*MACRO Practice*/
* Lesson 2 - Accessing Data *; libname pg1 "/people/m277325/consult/EPG1V2/data";
***********************************************************; proc freq data=pg1.np_species;
data class_copy1; tables Abundance Conservation_Status;
set sashelp.class; where Species_ID like "YOSE%" and
Category="Mammal"; run;
run;
proc print data=pg1.np_species;
var Species_ID Category Scientific_Name
/*Example 1 csv file*/ Common_Names;
proc import where Species_ID like "YOSE%" and double quotes.
Remember
datafile="/people/m277325/consult/EPG1V2/data/class_birthdat Category="Mammal"; run;
e.csv" If you need to remove
dbms=csv /*Now using Macros instead*/ warning, use single quotes
out=work.class_birthdate_import %let ParkCode=YOSE;
replace; run; %let SpeciesCat=Mammal;
proc contents data=class_birthdate_import; run; proc freq data=pg1.np_species;
tables Abundance Conservation_Status;
where Species_ID like "&ParkCode%" and
/*Example 2* Excel file*/
Category="&SpeciesCat"; run;
proc import
datafile="/people/m277325/consult/EPG1V2/data/eu_sport_trade proc print data=pg1.np_species;
.xlsx" var Species_ID Category Scientific_Name
dbms=xlsx Common_Names;
out=eu_sport_trade where Species_ID like "&ParkCode%" and
replace; run; Category="&SpeciesCat"; run;
proc contents data=eu_sport_trade; run; /*And changing the Macros to explore others*/
/*Example 3* delimiter file*/ %let ParkCode=ZION;
proc import %let SpeciesCat=Bird;
datafile="/people/m277325/consult/EPG1V2/data/np_traffic.dat
proc freq data=pg1.np_species;
"
tables Abundance Conservation_Status;
dbms=dlm
where Species_ID like "&ParkCode%" and
out=traffic2 Category="&SpeciesCat"; run;
replace;
guessingrows=3000; proc print data=pg1.np_species;
delimiter="|"; run; var Species_ID Category Scientific_Name
Lesson 3 Exploring and Validating Data libname '/people/m277325/consult/EPG1V2/data
/*SAS Procedures*/
/*SAS Procedures PROC PRINT*/
proc PRINT data=pg1.storm_summary (obs=10); /*Prints only the first 10 rows*/
var Season Name Basin MaxWindMPH; /*limits the columns included in the report */
/*Ctrl 'select' allows you to choose them from the library, then drag them
over */ run;
/*SAS Procedures PROC MEANS*/ /*Just basic summary statistics*/
proc MEANS data=pg1.storm_summary;
var MaxWindMPH MinWindPressure; run;
/*SAS Procedures PROC MEANS*/
proc UNIVARIATE data=pg1.storm_summary; /*More in-depth summary statistics, including extreme
values*/
var MaxWindMPH MinWindPressure; run;
/*Filtering using 'where'*/
libname pg1 "/people/m277325/consult/EPG1V2/data" (OBS=10); /*Prints only the first 10
observations*/
proc print data=pg1.storm_summary;
where Type="SUV" and MSRP >= 3000;
where Type="SUV" or Type="Truck" or Type="Wagon";
where Type in ("SUV", "Truck", "Wagon"); /*Easier way of using the OR */
where MaxWindMPH >= 156; /*Numeric*/
where Basin='WP'; /*Character*/
where Basin in ('SI', "NI"); /*contain SI -or- NI*/
Punctuation matters here
where StartDate >= "01jan2010"d; /*Dates end in d*/
where MaxWindMPH = . ; /*Missing Numeric values*/
where MaxWindMPH = “ “; /*Missing Charac values*/
where MaxWindMPH is missing; /*You could also use is missing*/
where MaxWindMPH is not missing; /*You could also use is not missing*/
where City like "New%" ; /*% is a wild card for any number of characters*/
/*Formatting Practice*/ /* This includes New York, New Delhi, Newport, Newcastle, New, etc*/
where City like "Sant_ %"; /*_
libname pg1 "/people/m277325/consult/EPG1V2/data"; is a wild card for a single character*/REMEMBER - Procedures
/* This includes Santa Clara, Santa Cruz, Santo Domigo, Santo Tomas,
proc print data=pg1.class_birthdate; - Print: creates listing of all rows
format Height Weight 3. Birthdate date9.; and columns
format Weight 5.; /* 45972 - 5 whole digits*/ - Means: calculates simple
format Weight 5.2; /* 45972.78 - 5 whole digits & 2 decimal summary stats (n, mean, std
places*/
dev, min, max)
format Weight 2.4; /* 45.9728 - 2 whole digits & 4 decimal
- Univariate: generates sum stats
places*/
format Weight COMMA8.1; /* 45,927.8 - 8 total values & 1 decimal and includes 5 lowest and 5
place*/ highest extreme values
format Weight DOLLAR10.2; /* $12,345.67 - 10 total values & 2 decimal - Freq: creates frequency table
places*/ for each column, can help find
format Weight DOLLAR10.; /* $12,346 - just rounds to the nearest problems in the data
dollar*/ - Contents: shows types,
format Weight Z8. ; /* 00001350 - 8 whole digits with zeros in formats, etc.
front*/
format Date date7.; /* 15JAN18 - 7 values with date*/
formatPractice*/
/*Sorting Date date9.; /* 15JAN2018 - 9 values with date*/
format
libname pg1Date date11.; /* 26-OCT-2018 - 11 values with date*/
"/people/m277325/consult/EPG1V2/data";
procformat Date MMDDYY10.;
sort data=pg1.class_test2 /* 01/15/2018 - month, date, year*/
out=test_sort;
by Subject; /*sorts autuomatically by ascending Subject*/
by descending Subject; /*Sorts by descending Subject*/
by Subject descending TestScore; /*Sorts by subject then descending TestScore*/ run;
All this code can go on one line. Formatted currently to include notes
proc sort data=pg1.class_test2
out=test_sort_noduplicates
nodupkey /* nodupkey removes duplicate rows*/
dupout=test_dups; /* dupout contains the duplicate rows that were removed by nodupkey*/
by _all_; /* _all_ This removes entirely duplicated rows*/
by student_id; /*this keeps only the first occurrence for each unique value (here
student_id)*/ run;
proc sort data=pg1.np_summary out=np_sort;
by Reg descending DayVisits;
where Type="NP"; run;
Lesson 4 Preparing Data libname '/people/m277325/consult/EPG1V2/data
proc sort data=pg1.eu_occ(keep=geo country) out=countryList /* Keep allows you to keep only these pieces */
nodupkey;
libname pg1 "/people/m277325/consult/EPG1V2/data";
by Geo Country; run;
data myclass;
set sashelp.class;
/**Using WHERE**/
where Age >=15;
where Course = 'Algebra';
where substr(Basin,2,1)="p"; /*where the 2nd letter of Basin is P; i.e. SP, EP, WP are included*/
/*Using Basin, start with the second position, and go for 1 position*/
where upcase(Common_Names) like'%FOX%' /*includes Red Fox, Common Squirrel Fox, Fox Bear*/
and upcase(Common_Names) not like '%SQUIRREL'; /*includes all Fox but not Squirrel - so NO Common
Squirrel Fox*/
where YearMon like "2016%";
keep Name Age Height; /*you can keep portions*/
drop Sex Weight; /*you can delete portions */
format Height 4.1 Weight 3.; /*rounding to nearest tenth & nearest whole #*/
/** Using IF - THEN **/
if MinPressure = . then PressureGroup=.;
else if MinPressure <=920 then PressureGroup=0;
else PressureGroup=1;
length CarType $12; /*length statement needs to be coded BEFORE if-then statements*/
if MSRP<6000 then CarType="Basic";
else CarType="Luxury"; Pay attention to the +1 here when calculating # of days
/** Making new Variables **/
MaxWindKM = MaxWindMPH * 1.60935; /*New Column now called MaxWindKM*/
/*Creating Multiple Output-Tables in one DATA /*Creating Permanant Tables our Tables in the SAS
step*/ Libraries*/
data indian atlantic pacific; /*you can create a permanent table in the
set pg1.storm_summary; EPG1V2/output folder*/
length Ocean $8; libname out "path-to-EPG1V2/output";
keep Basin Season Name MaxWindMPH Ocean; data out.storm_cat5;
Basin=upcase(Basin); . . .
OceanCode=substr(Basin,2,1); run;
if OceanCode="I" then do;
Ocean="Indian"; /*This creates a new storm_new table in the OUT
output indian; library!*/
end; data out.storm_new;
if OceanCode="A" then do; set pg1.storm_summary;
Ocean="Atlantic"; run;
output atlantic;
end;
if OceanCode="P" then do;
Ocean="Pacific";
output pacific;
end;
run;
Lesson 5: Analyzing and Reporting on Data libname '/people/m277325/consult/EPG1V2/data
/*One-Way Frequency Report*/
pro freq. data=pg1.storm_final order=freq nlevels; /*order=freq puts observations in descending order by
frequency*/
tables BasinName Season; /*nlevels adds table at the top to show number of obs*/
run;
ods graphics on;
ods noproctitle; /*gets ride of PROC FREQ title on table*/
proc rfeq data=pg1.storm_final order=freq nlevels;
tables BaisnName StartDate /
nocum plots=freqplot(orient=horitzonal scale=percent); /*plot=freqplot will produce a barchart*/
format StartDate monname.;
run;
title; /*turns off title*/
ods proctitle; /*turns PROC FREQ automatic title back on*/
/*Two-Way Frequency Report*/
proc freq data=og1.storm_final;
tables BasinName*StartDate /*BasinName will label rows and StartDate will label
columns*/
/ norow nocol nopercent; other no
/*no row and column percentage, options in this statement:
overall crosslist, list
percentages*/
format StartDate monname.;
label BasinName= “Basin”
StartDate= “Storm Month”;
run;
/*Example: Two-Way Frequency Report*/
title "Selected Park Types by Region";
ods graphics on;
proc freq data=pg1.np_codelookup order=freq;
tables Type*Region / nocol crosslist
plots=freqplot (groupby=row orient=horizontal scale=grouppercent);
where Type in ("National Historic Site", "National Monument", "National Park");
run;
title;
/*Creating Summary Statistic Reports w/ PROC MEANS*/
proc means data=pg1.storm_final mean median min max maxdec=0; /*maxdec=0 round to whole number*/
var MaxWindMPH;
class BasinName; /*Groups data, calculates stats for each unique value of
BasinName*/
run;
OUTPUT OUT = output-table <statistic=col-name>;
proc means data=pg1.storm_final mean median min max maxdec=0;
var MaxWindMPH;
class BainName StormType;
ways 0 1 2; /*ways=0 -> 0 classification values, calculates stats for whole
Lesson
table 6: Exporting Results libname '/people/m277325/consult/EPG1V2/data
run; ways=1 -> 2 separate tables, one for StormType & one for
BasinName ways=2 -> combo of BainName and StormType to calculate
/*Example of exporting an Excel workbook (data)*/ /*Example of exporting a csv (results)*/
stats*/
libname xlout xlsx “&outpath/southpacific.xlsx”; ods csvall file= “&outpath/cars.csv”;
data xlout.south_pacific; proc print data=sashelp.cars noobs;
set pg1.storm_final; var make model type msrp mpg_city
/*Creating an Output Summary Table*/
where Basin=“SP”; mpg_highway;
proc means data=sashelp.heart noprint;
run; format msrp dollar8.;
var Weight;
run;
class Chol_Status;
proc means data=pg1.storm_final noprint maxdec=1; ods csvall close;
where Basin=“SP”;
var MaxWindKM;
class Season;
ways 1;
output out=xlout.Season_Stats n=Count
mean=AvgMaxWindKM max=StrongestWindKM;
run;
libname xlout clear;
/*Example of exporting results to Excel*/
ods excel file="&outpath/StormStats.xlsx"
style=snow options(sheet_name='South Pacific Summary'); /*Named the first sheet in Excel
workbook*/
ods noproctitle;
proc means data=pg1.storm_detail maxdec=0 median max; /*First sheet is named “South Pacific
Summary*/
class Season;
var Wind;
where Basin='SP' and Season in (2014,2015,2016);
run;
ods excel options(sheet_name='Detail'); /*Second sheet for 2014 is named
“Detail”, third
proc print data=pg1.storm_detail noobs; sheet named “Detail 2” for 2015, and so
on*/
where Basin='SP' and Season in (2014,2015,2016);
Lessonby7:Season;
Using SQL in SAS libname '/people/m277325/consult/EPG1V2/data
run;
/*Filtering Data with SQL*/ /*Joining tables using SQL inner join*/
title ‘International Storms Since proc sql;
2000’; select class_update.name, grade, age, teacher /*Need this
proc sql; qualifier*/
select propcase(Name) as Name, from pg1.class_update inner join pg1.class_teachers
Age, Height, Birthday format=date9. on class_update.Name=class_teachers.Name;
from pg.class_birthdate quit;
where age > 14 and Height > 156
order by Heigh desc, Name;
quit; /*Assigning an alias*/
title; proc sql;
select u.name, grade, age, teacher
/*Creating a table from a query*/ from pg1.class_updated as u
proc sql; inner join pg1.class_teachers as t
create table work.myclass as on u.name=t.name;
select name, age, height run;
from pg1.class_birthdate
where age > 14
order by height desc;
quit;
Common Formats in SAS