dsadasdasdasdasdsadasdasdasdasdsadasdasdasdasdata clinic.
admit2;
/* Data Step */
set clinic.admit;
run;
proc print data=clinic.admit2; /* Procudure Step */
run;sadasdasdasdasdsadasdasdasdasdsadasdasdasdasd
proc tabulate data=proc print data=clinic.admit2;proc print data=clinic.admit2;proc
print data=clinic.admit2;clinic.admit;
class sex; /* Row */
var height weight; /* Column */
table sexsadasdasdasdasdsadasdasdasdasd*(height weight),mean; /* Cell value */
run;
proc print data=clinic.admit2;proc print data=clinic.admit2;proc print
data=clinic.admit2;proc print data=clinic.admit2;
proc sadasdasdasdasdsadasdasdasdasdcopy in=clinic out=work; /* Copy
admit dataset from clinic to work directory */
select admit;
run
/* SAS dsadasdasdasdasdsadasdasdasdasdataset has descriptive and data portion */
/* Attribute: Name, type(char-32767 byte, num-8 byte), format(write),
informat(read), label(256 char)*/
1234 as $1234.00 in a report, you can use the DOLLAR8.2
5678 as 5,678.00 in a report, you can use the COMMA8.2
Formats write values out
informats read data values
*-----;
SAS Help: Sample datasets are stored
libname clinic(1-8) 'd:\users\qtr\reports';
proc contents data=mylib._all_ nods;
run;
proc datasets;
contents data=sasuser._all_ nods;
quit;
35
options nonumber nodate;
options date;
options nodate pageno=3;
options pageno=1 pagesize=15;
options pageno=1 linesize=64;
options yearcutoff=1925;
options firstobs=10 obs=15;
proc print data=sasuser.heart;
run;
proc options; /*To list all SAS system options, their settings */
run;
Data error and syntax error
----------
proc print data=clinic.admit; To get column sum
sum fee;
run;
proc print data=admit label;
var age height weight fee;
label fee='Admission Fee';
run;
proc print data=clinic.admit noobs;
var age height weight fee;
where age>30;
run;
= ^= > < >= <=
eq ne gt lt ge le
where firstname CONTAINS 'Jon';
where firstname ? 'Jon';
proc sort data=clinic.admit out=work.wgtadmit;
by weight age;
run;
proc print data=work.activity;
var age height weight fee;
where age>30;
sum fee;
by actlevel; Sub total
pageby actlevel;
run;
proc print data=clinic.stress double; Double Spacing
var resthr maxhr rechr;
where tolerance='I';
run;
title1 'Heart Rates for Patients with';
title3 'Increased Stress Tolerance Levels';
proc print data=clinic.stress;
var resthr maxhr rechr;
where tolerance='I';
run;
footnote1 'Data from Treadmill Tests';
footnote3 '1st Quarter Admissions';
proc print data=clinic.stress;
var resthr maxhr rechr;
where tolerance='I';
run;
title1; Remove title1
proc print data=clinic.therapy label;
label walkjogrun='Walk/Jog/Run'; Change label
run;
proc print data=clinic.admit;
var actlevel fee;
where actlevel='HIGH';
format fee dollar4.;
run;
-------
filename tests 'c:\users\tmill.dat'; reference external file
infile tests;
filename exer 'c:\users\exer.dat';
data exercise;
infile exer;
input ID $ 1-4 Age 6-7 ActLevel $ 9-12 Sex $ 14;
run;
Unlike syntax errors, invalid data errors do not cause SAS to stop processing a
program.
- ** * / + -
& |
and or
TestDate='01jan2000'd;
Time='9:25't;
DateTime='18jan2005:9:27:05'dt
data clinic.stress;
input ID 1-4 Name $ 6-25 RestHr 27-29 MaxHR 31-33
RecHR 35-37 TimeMin 39-40 TimeSec 42-43
Tolerance $ 45;
if tolerance='D';
TotalTime=(timemin*60)+timesec;
datalines;
2458 Murray, W 72 185 128 12 38 D
2462 Almers, C 68 171 133 10 5 I
;
---------
Data step: Compilation phase then execution phase
Program data vector, area of memory where SAS builds a data set, one observation
at a time.
Automactic variables are created _n_ and _error_
end of file marker is reached where process ends
put 'MY NOTE: The condition was met.';
code= _n_= _error_=;
put 'MY NOTE: invalid value: ' _all_ ;
if rate>0 then
Interest=amount*(rate/12);
else put 'DATA ERROR ' rate= _n_=;
Missing numeric values are represented by periods, and missing character values are
represented by blanks.
-------------
proc format lib=library;
value jobfmt
103='manager'
105='text processor'
111='assoc. technical writer'
112='technical writer'
113='senior technical writer';
run;
proc format lib=library;
value $grade
'A'='Good'
'B'-'D'='Fair'
'F'='Poor'
'I','U'='See Instructor';
run;
proc format lib=library;
value agefmt
low-<13='child'
13-<20='teenager'
20-<65='adult'
65-high='senior citizen'
other='unknown';
run;
If applied to a character format, the keyword LOW includes missing character
values.
data perm.empinfo;
infile empdata;
input @9 FirstName $5. @1 LastName $7. +7 JobTitle 3.
@19 Salary comma9.;
format salary comma9.2 jobtitle jobfmt.;
run;
libname library 'c:\sas\formats\lib';
proc format library=library fmtlib; list of all formats in your cataloge
run;
------------
proc report data=flights.europe nowd;
run;
proc report data=flights.europe nowd;
column flight orig dest mail freight revenue;
run;
proc report data=flights.europe nowd;
column flight orig dest mail freight revenue;
where dest in ('LON','PAR');
run;
proc report data=flights.europe nowd;
where dest in ('LON','PAR');
column flight orig dest mail freight revenue;
define revenue/format=dollar15.2;
run;
proc report data=flights.europe nowd;
where dest in ('LON','PAR');
column flight orig dest mail freight revenue;
define revenue / format=dollar15.2;
define flight / width=6;
define orig / width=4 spacing=5;
define dest / width=4 spacing=5;
run;
proc report data=flights.europe nowd;
where dest in ('LON','PAR');
column flight orig dest mail freight revenue;
define revenue / format=dollar15.2;
define flight / width=13'Flight Number';
define orig / width=13 spacing=5 'Flight Origin';
define dest / width=18 spacing=5 'Flight Destination';
run;
proc report data=flights.europe nowd headline headskip;
where dest in ('LON','PAR');
column flight orig dest mail freight revenue;
define revenue / mean format=dollar15.2
'Average/Revenue';
define flight / group 'Flight/Number' width=6 center;
define orig / group width=6 spacing=5 'Flight/Origin'
center;
define dest / group width=11 spacing=5
'Flight/Destination' center;
run;
-----------
proc means data=perm.survey;
run;
proc means data=perm.survey median range;
run;
proc means data=clinic.diabetes min max maxdec=0;
run;
proc means data=clinic.diabetes min max maxdec=0;
var age height weight;
run;
proc means data=clinic.heart maxdec=1;
var arterial heart cardiac urinary;
class survive sex;
run;
proc means data=clinic.diabetes;
var age height weight;
class sex;
output out=work.sum_gender
mean=AvgAge AvgHeight AvgWeight
min=MinAge MinHeight MinWeight;
run;
class will generate single table, but by will generate small tables
proc summary data=clinic.diabetes;
var age height weight;
class sex;
output out=work.sum_gender
mean=AvgAge AvgHeight AvgWeight;
run;
proc freq data=parts.widgets;
run;
proc freq data=finance.loans;
tables rate months;
run;
proc freq data=clinic.diabetes;
tables weight*height; (rows*cloumn)
format weight wtfmt. height htfmt.;
run;
proc freq data=clinic.diabetes;
tables sex*weight / nofreq norow nocol;
format weight wtfmt.;
run;
-----------
ODS: Output Delivery System
ods html body='c:\mydata.html';
proc print data=sasuser.mydata;
run;
ods html close;
ods listing close; dont want to produce listing output
ods html file='HTML-file-pathname';
ods pdf file='PDF-file-pathname';
proc print data=sasuser.admit;
run;
ods _all_ close;
ods listing;
ods listing close;
ods html body='f:\admit.html';
proc print data=clinic.admit label;
var sex age height weight actlevel;
label actlevel='Activity Level';
run;
ods html close;
ods listing;
ods listing close;
ods html body='c:\records\data.html'
contents='c:\records\toc.html'
frame='c:\records\frame.html';
proc print data=clinic.admit label;
var id sex age height weight actlevel;
label actlevel='Activity Level';
run;
proc print data=clinic.stress2;
var id resthr maxhr rechr;
run;
ods html close;
ods listing;
----------------
data clinic.stress;
infile tests;
input ID $ 1-4 Name $ 6-25 RestHR 27-29 MaxHR 31-33
RecHR 35-37 TimeMin 39-40 TimeSec 42-43
Tolerance $ 45;
TotalTime=(timemin*60)+timesec;
retain SumSec 5400;
sumsec+totaltime;
run;
data clinic.stress;
infile tests;
input ID $ 1-4 Name $ 6-25 RestHR 27-29 MaxHR 31-33
RecHR 35-37 TimeMin 39-40 TimeSec 42-43
Tolerance $ 45;
TotalTime=(timemin*60)+timesec;
retain SumSec 5400;
sumsec+totaltime;
if totaltime>800 then TestLength='Long';
run;
In SAS, any numeric value other than 0 or missing is true, and a value of 0 or
missing is false.
if totaltime>800 then TestLength='Long';
else if 750<=totaltime<=800 then TestLength='Normal';
else if totaltime<750 then TestLength='Short';
data clinic.stress;
infile tests;
input ID $ 1-4 Name $ 6-25 RestHR 27-29 MaxHR 31-33
RecHR 35-37 TimeMin 39-40 TimeSec 42-43
Tolerance $ 45;
TotalTime=(timemin*60)+timesec;
retain SumSec 5400;
sumsec+totaltime;
length TestLength $ 6;
if totaltime>800 then testlength='Long';
else if 750<=totaltime<=800 then testlength='Normal';
else if totaltime<750 then TestLength='Short';
run;
if resthr<70 then delete;
data clinic.stress(drop=timemin timesec);
data clinic.stress(keep=timemin timesec);
format sumsec comma6.;
data emps(keep=salary group);
set sasuser.payrollmaster;
length Group $ 20;
select(jobcode);
when ("FA1") group="Flight Attendant I";
when ("FA2") group="Flight Attendant II";
when ("FA3") group="Flight Attendant III";
otherwise group="Other";
end;
run;
if totaltime>800 then
do;
testlength='Long';
message='Run blood panel';
end;
else if 750<=totaltime<=800 then testlength='Normal';
else if totaltime<750 then TestLength='Short';
run;
data payroll;
set salaries;
select(payclass);
when ('monthly') amt=salary;
when ('hourly')
do;
amt=hrlywage*min(hrs,40);
if hrs>40 then put 'CHECK TIMECARD';
end;
otherwise put 'PROBLEM OBSERVATION';
end;
run;
--------------
data lab23.drug1h;
setresearch.cltrials;
run;
data lab23.drug1h(drop=placebo uric);
set research.cltrials(drop=triglyc);
if sex='M' then delete;
if placebo='YES';
TestDate='22MAY1999'd;
retain Days 30;
days+1;
length Retest $ 5;
if cholesterol>190 then retest='YES';
else if 150<=cholesterol<=190 then retest='CHECK';
else if cholesterol<150 then retest='NO';
label retest='Perform Cholesterol Test 2?';
format enddate mmddyy10.;
run;
data lab23.drug1h(drop=placebo);
set research.cltrials(drop=triglycerides uricacid);
if placebo='YES';
run;
data work.addtoend(drop=timemin timesec);
set clinic.stress2(keep=timemin timesec)end=last;
TotalMin+timemin;
TotalSec+timesec;
TotalTime=totalmin*60+timesec;
if last;
run;
--------------
One-to-one reading : Creates observations that contain all of the variables from
each contributing data set.
Data clinic.one2one;
set clinic.patients;
if age<60;
set clinic.measure;
run;
Concatenating : Appends the observations from one data set to another.
data clinic.concat;
set clinic.therapy1999 clinic.therapy2000;
run;
Interleaving : Intersperses observations from two or more data sets, based on one
or more common variables.
data clinic.interlv;
set clinic.therapy1999 clinic.therapy2000;
by month;
run;
Match-merging : Matches observations from two or more data sets into a single
observation in a new data set according to the values of a common variable.
data clinic.merged;
merge clinic.demog clinic.visit;
by id;
run;
data clinic.merged;
merge clinic.demog(rename=(date=BirthDate))
clinic.visit(rename=(date=VisitDate));
by id;
run;
proc print data=clinic.merged;
run;
data clinic.merged;
merge clinic.demog(in=indemog
rename=(date=BirthDate))
clinic.visit(in=invisit
rename=(date=VisitDate));
by id;
if indemog=1 and invisit=1;
run;
merge clinic.demog(in=indemog
rename=(date=BirthDate))
clinic.visit(drop=weight in=invisit
rename=(date=VisitDate));
----------
mean(of x1-x3)
The INPUT function converts character data values to numeric values.
The PUT function converts numeric data values to character values.
Test=input(saletest,comma9.);
INPUT(source,informat)
PUT(source,format)
data hrd.newtemp;
set hrd.temp;
Assignment=put(site,2.)||'/'||dept;
run;
January 1, 1960
proc print data=hrd.temp;
format startdate enddate date9.;
run;
date=mdy(mon,day,yr);
now=today();
now=date();
curtime=time();
day=day(date);
quarter=qtr(date);
wkday=weekday(date);
month=month(date);
yr=year(date);
x=intck('day',d1,d2);
x=intck('week',d1,d2);
x=intck('month',d1,d2);
x=intck('qtr',d1,d2);
x=intck('year',d1, d2);
x=intnx('interval',start- from,increment);
x=datdif('date1',date2, ACT/ACT);
x=yrdif('date1',date2, ACT/ACT);
Years=intck('year',hired,today());
TargetYear=intnx('year','05feb94'd,3);
MonthX=intnx('month','01jan95'd,5,'b');
DATDIF(start_date,end_date,basis)
YRDIF(start_date,end_date,basis)
Years=intck('year',hired,today());
TargetYear=intnx('year','05feb94'd,3);
MonthX=intnx('month','01jan95'd,5,'b');
SCAN(argument,n,delimiters)
LastName=scan(name,1)
SUBSTR(argument,position,<n>)
substr(middlename,1,1)
MiddleInitial=substr(middlename,1,1); Give value
substr(region,1,3)='NNW'; Set value
data hrd.newtemp(drop=address city state zip);
set hrd.temp;
NewAddress=trim(address)||', '||trim(city)||', '||zip;
run;
data hrd.newtemp(drop=address city state zip);
set hrd.temp;
NewAddress=catx(', ',address,city,zip);
run;
data hrd.datapool;
set hrd.temp;
if index(job,'word processing') > 0; search word processing in column job
run;
index(upcase(job),'WORD PROCESSING')
index(lowcase(job),'word processing')
data hrd.datapool;
set hrd.temp;
if find(job,'word processing','t') > 0;
run;
upcase(job)
lowcase(title)
Contact=propcase(contact)
name=tranwrd(name,'Monroe','Manson') replace Monroe with Manson
INT(argument) interger part is returned
ROUND(argument,round-off-unit)
MiddleInitial=substr(scan(name,3),1,1);
Years=intck('year','15jun1999'd,today());
--------------
data finance.earnings;
Amount=1000;
Rate=.075/12;
do month=1 to 12;
Earned+(amount+earned)*(rate);
end;
run;
data work.earn (drop=counter);
Value=2000;
do counter=1 to 20;
Interest=value*.075;
value+interest;
Year+1;
end;
run;
data work.earn;
Value=2000;
do Year=1 to 20;
Interest=value*.075;
value+interest;
output; explicit output
end;
run;
DO index-variable=5 to 1 by -1;
SAS statements
END;
data work.earn;
do year=1 to 20;
Capital+2000;
do month=1 to 12;
Interest=capital*(.075/12);
capital+interest;
end;
end;
run;
data work.invest;
do until(Capital>=50000);
capital+2000;
capital+capital*.10;
Year+1;
end;
run;
data work.invest;
do while(Capital>=50000);
capital+2000;
capital+capital*.10;
Year+1;
end;
run;
data work.subset;
do sample=10 to 5000 by 10;
set factory.widgets point=sample;
output;
end;
stop;
run;
-------------
A SAS array is a temporary grouping of SAS variables under a single name. An array
exists only for the duration of the DATA step.
data work.report(drop=i);
set master.temps;
array wkday{7} mon tue wed thr fri sat sun;
do i=1 to 7;
wkday{i}=5*(wkday{i}-32)/9;
end;
run;
array sales{4} qtr1 qtr2 qtr3 qtr4;
array sales{4} qtr1-qtr4;
array sales{96:99} totals96-totals99;
Amount Rate Term → _NUMERIC_
array sales{*} _numeric_;
FrstName LastName Address → →→ → _CHARACTER_
array sales{*} _character_
data work.report(drop=i);
set master.temps;
array wkday{7} mon tue wed thr fri sat sun;
do i=1 to 7;
if wkday{i}>95 then output;
end;
run;
data work.weights(drop=i);
set master.class;
array wt{6} w1-w6;
do i=1 to 6;
wt{i}=wt{i}*2.2;
end;
run;
data work.new(drop=i);
set master.synyms;
array term{9} also1-also9;
do i=1 to 9;
if term{i} ne ' ' then output;
end;
run;
data hrd.convert;
set hrd.fitclass;
array wt{*} weight1-weight6;
do i=1 to dim(wt);
wt{i}=wt{i}*2.2046;
end;
run;
array firstname{5} $ 24; /* Creating array */
data hrd.diff;
set hrd.convert;
array wt{6} weight1-weight6;
array WgtDiff{5};
do i=1 to 5;
wgtdiff{i}=wt{i+1}-wt{i};
end;
run;
array goal{4} g1 g2 g3 g4 (9000 9300 9600 9900); Assigining values to macro
array col{3} $ color1-color3 ('red','green','blue');
array Var{4} (1 2 3 4);
data finance.report;
set finance.qsales;
array sale{4} sales1-sales4;
array Goal{4} (9000 9300 9600 9900);
array Achieved{4};
do i=1 to 4;
achieved{i}=100*sale{i}/goal{i};
end;
run;
data finance.report;
set finance.qsales;
array sale{4} sales1-sales4;
array goal{4} _temporary_ (9000 9300 9600 9900);
array Achieved{4};
do i=1 to 4;
achieved{i}=100*sale{i}/goal{i};
end;
run;
array new{3,4} x1-x12; 2-D Array
new{2,3}=0;
data finance.quarters(drop=i j);
set finance.monthly;
array m{4,3} month1-month12;
array Qtr{4};
do i=1 to 4;
qtr{i}=0;
do j=1 to 3;
qtr{i}+m{i,j};
end;
end;
run;
data work.rotate(drop=qtr1-qtr4); Rotating the dataset
set finance.funddrive;
array contrib{4} qtr1-qtr4;
do Qtr=1 to 4;
Amount=contrib{qtr};
output;
end;
run;
-------------
SAS provides three primary input styles—column input, formatted input, and list
input.
input Item $ 1-13 IDnum $ 15-19 InStock 21-22
BackOrd 24-25;
The @n moves the input pointer to a specific column number.
The +n moves the input pointer forward to a column number that is relative to the
current position
input @1 LastName $7.
input LastName $7. @9 FirstName $5.
input @9 FirstName $5. @1 LastName $7. @15 JobTitle 3.
input LastName $7. +1 FirstName $5. +5 Salary comma9.
input LastName $7. +1 FirstName $5. +5 Salary comma9. @15 JobTitle 3.;
You can use the notation +(-n) to move the +n pointer control backward.
input @9 FirstName $5.;
input @9 FirstName $5. @1 LastName $7. +7 JobTitle 3.; read with length 3
input @9 FirstName $5. @1 LastName $7. +7 JobTitle 3; assign length of 3
data perm.empinfo;
infile empdata;
input @9 FirstName $5. @1 LastName $7. +7 JobTitle 3.
@19 Salary comma9.;
run;
Two common record formats are fixed-length records and variable-length records.
input Dept $ 1-11 @13 Receipts comma8.;
infile receipts pad;
The PAD option pads each record with blanks so that all data lines have the same
length.
---------------
data perm.survey;
infile credit;
input Gender $ Age Bankcard FreqBank Deptcard
FreqDept;
run;
data perm.survey;
infile credit dlm=',';
input Gender $ Age Bankcard FreqBank
Deptcard FreqDept;
run;
data survey.phone;
infile survey;
input IDnum $ Ques1-Ques5;
run;
data survey.stores;
infile stordata;
input Age (Store1-Store3) ($); Range of character variable
run;
data test.scores;
infile group3;
input Age (Score1-Score4) (6.);
run;
data perm.survey;
infile credit missover; Missing at the end
input Gender $ Age Bankcard FreqBank
Deptcard FreqDept;
run;
data perm.survey;
infile credit dsd;
input Gender $ Age Bankcard FreqBank
Deptcard FreqDept;
run;
data perm.growth;
infile citydata;
length City $ 12; Specify length
input city $ Pop70 Pop80;
run;
The ampersand (&) modifier is used to read character values that contain embedded
blanks.
The colon (:) modifier is used to read nonstandard data values and character values
that are longer than eight characters, but which contain no embedded blanks.
input Rank City &; 2 consec blanks
The colon (:) indicates that values are read until a blank (or other delimiter)
is encountered, and then an informat is applied
data perm.cityrank;
infile topten;
input Rank City & $12.
Pop86 : comma.;
data _null_; free format
set perm.finance;
file 'c:\data\findat';
put ssn name salary date : date9.;
run;
data _null_;
set perm.finance;
file 'c:\data\findat2' dlm=',';
put ssn name salary date : date9.;
run;
data _null_
set perm.finance;
file 'c:\data\findat2' dlm=',';
put ssn name salary : comma6. date date9.;
run;
data _null_;
set perm.finance;
file 'c:\data\findat2' dsd; value containing commas
put ssn name salary : comma. date : date9.;
run;
data work.finance2;
infile findat2 dsd;
length SSN $ 11 Name $ 9;
input ssn name Salary : comma. Date date9.;
run;
data _null_;
set work.totals;
file totaldat;
put 'Sales for salesrep ' salesrep
'totaled ' sales : dollar9.;
run;
--------------------
A SAS date value is the number of days from January 1, 1960, to the given date.