11/12/2010
ManipulatingDatainSAS
Merging,Stacking,Transposingdata
1.PuttingDatasetsTogether:Stacking
LSB 6.2
Data:A
Data:Stacked
Obs xyzw
1A1011.
2B0264.
Data:B
3C1734.
Obs xyzw
4D10111
1 D10111
5E02642
2 E02642
3 F17343
6F17343
Obs xyz
1
1A1011
A 10 11
2B0264
3C1734
libname sastrain "c:\sas\";
Datastacked;
setsastrain.A sastrain.B ;
run;
11/12/2010
1..PuttingDatasetsTogether:Stacking keep
trackofwhereobservationscamefrom(LBS6.12)
Data:Stacked
Data: A
Obs
1
2
3
y
A
B
C
z
11
64
34
10
02
17
Data: B
Obs
1
2
3
x
D
E
F
y
10
02
17
z
11
64
34
w
1
2
3
Obs
1
2
3
4
5
6
A
B
C
D
E
F
10
2
17
10
2
17
11
64
34
11
64
34
.
.
.
1
2
3
data_set
A
A
A
B
B
B
libname sastrain "c:\sas\";
Datastacked;
setsastrain.A (IN=In_A)sastrain.B ;
ifIn_a thendata_set="A";
elsedata_set="B";
run;
2.PuttingDatasetsTogether:Interleaving ifthe
dataarealreadysorted(LSB6.3)
DATA:
Obs
1
2
3
DATA:
Obs
1
2
3
A_1
x
A
C
E
B_1
x
B
D
F
Data:Stacked
y
10
17
2
y
2
10
17
z
11
34
64
z
64
11
34
w
2
1
3
1
2
3
4
5
6
A
B
C
D
E
F
10
2
17
10
2
17
11
64
34
11
64
34
.
2
.
1
.
3
libname sastrain "c:\sas\";
Datastacked;
setsastrain.A_1sastrain.B_1;
byX;
run;
11/12/2010
3.Combinedatausingonetoone
merge(LSB6.4)
Data:Sastrain.A
Obs
1
2
3
x
A
B
C
y
10
2
17
Data: Merged
z
11
64
34
Data:Sastrain.C
Obs
1
2
3
4
x
A
C
B
D
w
1
2
3
4
Obs
1
2
3
4
x
A
B
C
D
y
10
2
17
.
z
11
64
34
.
w
1
3
2
4
Proc Sort data=sastrain.A;
byx;
Proc Sort data=sastrain.C;
byx;
data merged;
mergesastrain.D sastrain.E;
byx;
run;
4.Mergedatausingonetomany
merge(LSB6.5)
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
Data:Sastrain.E
Obs
1
2
3
x
A
C
B
z
11
64
34
22
Obs
1
2
3
4
5
x
A
A
B
C
C
y
10
2
.
17
7
z
11
64
.
34
22
w
1
1
3
2
2
w
1
2
3
ProcSortdata=sastrain.D;
byx;
ProcSortdata=sastrain.E;
byx;
datamerged;
mergesastrain.D sastrain.E;
byx;
run;
11/12/2010
5.MergingSummaryStatisticswith
OriginalData(LSB6.6)
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
Data: Merged
Obs
x
y
1
2
3
4
A
A
C
C
10
2
17
7
Data:sum_data
z
11
64
34
22
Obs
1
2
sum_y
11
64
34
22
x
A
C
_TYPE_
0
0
mean_Z
pct_y
37.5
37.5
28.0
28.0
83.3333
16.6667
70.8333
29.1667
12
12
24
24
_FREQ_
2
2
sum_y mean_Z
12
37.5
24
28.0
Mean_diff_z
-26.5
26.5
6.0
-6.0
procmeansdata=sastrain.D;
d t
t i D
byX;
outputout=sum_data SUM(y)=sum_y mean(Z)=mean_Z;
datamerged;
mergesastrain.d sum_data;
byx;
pct_y=y/sum_y*100;
mean_diff_z=zmean_z;
drop_type__freq_;
Run;
6.Combinegrandtotalwiththe
originaldata(LSB6.7)
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
Data: Merged
Obs
y
1
10
2
2
3
17
4
7
y
10
2
17
7
Data:sum_data
z
11
64
34
22
pct_y
27.7778
5.5556
47.2222
19.4444
Obs
1
_TYPE_
0
_FREQ_
4
sum_y
36
sum_y
36
36
36
36
procmeansdata=sastrain.D
d t
t i D noprint;
i t
outputout=sum_data SUM(y)=sum_y;
datadata_gt;
if_N_=1thensetsum_data;
Setsastrain.D;
pct_y=y/sum_y*100;
drop_type__freq_;
run;
procprintdata=data_gt;
var ypct_y sum_y;
run;
11/12/2010
7.Writingmultipledatasetsusing
OUTPUTstatement(LSB6.9)
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
z
11
64
34
22
Data:Data_A
Obs
1
2
x
A
A
Data:Data_C
y
10
2
z
11
64
Obs
1
2
x
C
C
y
17
7
z
34
22
dataData_A Data_C;
setsastrain.d;
ifX="C"thenoutputData_C;
ifX="A"thenoutputData_A;
run;
8.SimpleDatastepLoops(LSB6.10)
Data:Generate
Obs
1
2
3
4
5
6
1
2
3
4
5
6
y
1
4
9
16
25
36
datagenerate;
dox=1to6;
y=x**2;
output;
end;
run;
11/12/2010
9.UsingSASautomaticvariable_N_
(LSB6.15)
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
z
11
64
34
22
y
10
2
17
7
z
11
64
34
22
Data:Data_A
Obs
1
2
3
4
x
A
A
C
C
Observ
1
2
3
4
dataData_A;
setsastrain.d;
Observ =_n_;
run;
10a.Subsetting Data
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
z
11
64
34
22
Data:Sastrain.D
Obs
2
3
x
A
C
y
2
17
z
64
34
dataData_A;
setsastrain.d (firstobs =2obs =3);
run;
11/12/2010
10b.Subsetting Data
Data:Sastrain.D
Obs
1
2
3
4
x
A
A
C
C
y
10
2
17
7
z
11
64
34
22
Data:Data_a
Data:Data_big_y
Obs
1
2
A
A
10
2
11
64
Obs
1
2
A
C
10
17
11
34
dataData_A (where=(Xcontains('A')))data_Big_y (where=(y>8));
setsastrain.d ;
run;
11a.TransposingData(LSB6.14)
Data:trans
Stateyear1year2year3
ky
33 10 11
tn
44 2 64
ga
55 17 34
proc transpose
data=sastrain.trans
out=flipped;
fli
d
id state;
var year1-year3;
run;
11/12/2010
11b.TransposingData(LSB6.14)
Data:trans2
State Varn year1 Year2 Year3
ky
y
GDP
6000 7000 8000
ky
Tax
0.25 0.26 0.27
tn
GDP
8000 9000 9000
tn
Tax
0.2 0.21 0.22
procsortdata=trans2;
byvarn;
run;
proctransposedata=trans2
proc
transpose data=trans2
out=trans2_1;
byvarn;
var year1year3;
Run;
Data:trans2
Obs Varn
_NAME_
1
2
3
4
5
6
Year1
Year2
Year3
Year1
Year2
Year3
GDP
GDP
GDP
Tax
Tax
Tax
COL1
6000.00
7000.00
8000.00
0.25
0.26
0.27
COL2
8000.00
9000.00
9000.00
0.20
0.21
0.22
11b.TransposingData(LSB6.14)
Data:trans2
State Varn year1 Year2 Year3
y
GDP
6000 7000 8000
ky
ky
Tax
0.25 0.26 0.27
tn
GDP
8000 9000 9000
tn
Tax
0.2 0.21 0.22
procsortdata=trans2;
byvarn;
run;
p
proctransposedata=trans2
p
out=trans2_1;
byvarn;
Idstate
var year1year3;
Run;
Data:trans2
Obs Varn
_NAME_
1
2
3
4
5
6
Year1
Year2
Year3
Year1
Year2
Year3
GDP
GDP
GDP
Tax
Tax
Tax
ky
6000.00
7000.00
8000.00
0.25
0.26
0.27
tn
8000.00
9000.00
9000.00
0.20
0.21
0.22
11/12/2010
11b.TransposingData(LSB6.14)
Data:trans2
State Varn year1 Year2 Year3
ky
y
GDP
6000 7000 8000
ky
Tax
0.25 0.26 0.27
tn
GDP
8000 9000 9000
tn
Tax
0.2 0.21 0.22
procsortdata=trans2;
byvarn;
run;
p
proctransposedata=trans2
p
out=trans2_1name=year;
byvarn;
Idstate
var year1year3;
Run;
Data:trans2
Obs
State
1
2
3
4
5
6
ky
ky
ky
tn
tn
tn
year
GDP
Tax
Year1
Year2
Year3
Year1
Year2
Year3
6000
7000
8000
8000
9000
9000
0.25
0.26
0.27
0.20
0.21
0.22
11b.TransposingData(LSB6.14)
procsortdata=trans2;
bystate;
Data:trans2
State Varn year1 Year2 Year3
y
GDP
6000 7000 8000
ky
ky
Tax
0.25 0.26 0.27
tn
GDP
8000 9000 9000
tn
Tax
0.2 0.21 0.22
proctransposedata=trans2
out=trans2 2 name=year;
out=trans2_2name=year;
bySTATE;
idvarn;
var year1year3;
run;
Data:trans2
Obs Varn
year
ky
1
2
3
4
5
6
Year1
Year2
Year3
Year1
Year2
Year3
6000.00
7000.00
8000.00
0.25
0.26
0.27
GDP
GDP
GDP
Tax
Tax
Tax
tn
8000.00
9000.00
9000.00
0.20
0.21
0.22
11/12/2010
11b.TransposingData(LSB6.14)
Data:trans2
Data:trans2
State Varn year1 Year2 Year3
ky
GDP
6000 7000 8000
ky
Tax
0
0.25
25 0
0.26
26 0
0.27
27
tn
GDP
8000 9000 9000
tn
Tax
0.2 0.21 0.22
Obs StateVarn yearCOL1
procsortdata=trans2;
by state varn;
bystatevarn;
proctransposedata=trans2out=trans2_3
name=year;
bySTATEvarn;
var year1year3;
run;
1ky
2ky
3ky
4ky
5ky
6ky
7tn
8
8tn
9tn
10tn
11tn
12tn
GDPYear16000.00
GDPYear27000.00
GDPYear38000.00
TaxYear10.25
TaxYear20.26
TaxYear30.27
GDPYear18000.00
GDP Y 2 9000 00
GDPYear29000.00
GDPYear39000.00
TaxYear10.20
TaxYear20.21
TaxYear30.22
12.RetainStatement(LSB3.10)
Data:Running_sum
Day yzmax_zsum_y
z
Data:sastrain.g
dayy
1 10
2 02
3 17
11
64
110 111110
34
2 2 6464 12
317 3464 29
Datarunning_sum;
setsastrain.g;
Retainymax_z;
Run_sum_y +y;
Max_z=Max(max_z,z);
run;
10
11/12/2010
13.Dateformats(LSB3.8)
Data:library.txt
A.Jones11609150629Feb08
M.Rincon05/10/194929feb200823MAY2008
Z G d
Z.Grandage
03181988 31 10 2007 31
031819883110200731may08
08
K.Kaminaka 052903200802412JUN08
DATAlibrarycards;
INFILE'c:\MyRawData\Library.txt'TRUNCOVER;
INPUT
INPUT
Name $11 + 1
Name$11.+1
BirthDate MMDDYY10.+1
MMDDYY10 +1
IssueDate ANYDTDTE10.
DueDate DATE11.;
DaysOverDue =TODAY() DueDate;
Age=INT(YRDIF(BirthDate,TODAY(),'ACTUAL'));
IFIssueDate >'01JAN2008'DTHENNewCard ='yes';
RUN;
13.Dateformats(LSB3.8)cont.
Data:library.txt
A.Jones11609150629Feb08
M.Rincon05/10/194929feb200823MAY2008
Z G d
Z.Grandage
03181988 31 10 200731
031819883110200731may08
08
K.Kaminaka 052903200802412JUN08
Procprintdata=librarycards;
PROCPRINTDATA=librarycards;
FORMATIssuedate MMDDYY8.DueDate WEEKDATE17.;
TITLE'SASDateswithoutandwithFormats';RUN;
11