Note length id and NOOBS haven’t been taught yet in this course, however know them from
prior self learning on watching youtube playlists; also currently watching playlists on C++ and
python.
Code: Part 1
data Individuals;
length Gender $1;
input Gender $ Age Height Weight;
datalines;
M 45 70 180
F 30 65 135
M 60 72 200
F 28 62 120
M 50 68 175
;
run;
proc print data=Individuals label noobs;
title "Individuals Dataset";
label Height = "Height (inches)"
Weight = "Weight (pounds)";
run;
proc means data=Individuals n mean std min max;
title "Overall Summary Statistics";
var Age Height Weight;
run; *To get the averages for the paragraph;
proc means data=Individuals n mean std min max;
title "Summary Statistics by Gender";
class Gender;
var Age Height Weight; *To get the averages for the paragraph;
run;
title;
The dataset has 5 people (3 M, 2 F). Average age is 42.6 yrs, height 67.4 in, weight 162 lb.
Males are older (51.7 yrs), taller (70 in), and heavier (185 lb) than females (29 yrs, 63.5 in,
127.5 lb). With my great wisdom, I am able to tell there is a difference between males and
females.
Part 2:
data Individuals_Missing;
length Gender $1;
input Gender $ Age Height Weight;
datalines;
M 45 70 180
F . 65 135
M 60 . 200
F 28 62 120
M . 68 175
;
run;
proc means data=Individuals_Missing n nmiss;
title "Missing Value Counts";
var Age Height Weight;
run;
proc means data=Individuals_Missing noprint;
var Age Height Weight;
output out=_means_ mean=mean_Age mean_Height mean_Weight;
run;
data Individuals_Filled;
if _n_=1 then set _means_;
set Individuals_Missing;
if missing(Age) then Age = mean_Age;
if missing(Height) then Height = mean_Height;
if missing(Weight) then Weight = mean_Weight;
drop mean_: _TYPE_ _FREQ_;
run;
proc print data=Individuals_Filled label noobs;
title "Dataset After Mean Quote Fixing End Quote";
label Height="Height (inches)" Weight="Weight (pounds)";
run;
proc means data=Individuals_Filled n nmiss mean ;
title "Post-Government Fixing";
var Age Height Weight;
run;
title;
Part 3
data Employees;
length ID $4 Name $30;
infile datalines dsd truncover;
input ID $ Name $ Age Salary;
datalines;
E001,John Doe,45,75000
E002,Jane Smith,30,55000
E003,Bob Brown,50,82000
E004,Sue Davis,28,46000
E005,Tom White,38,68000
;
run;
proc print data=Employees label noobs;
title "Employees Dataset";
label Salary="Salary ($)";
format Salary dollar12.;
run;
proc means data=Employees mean maxdec=2;
title "Average Salary (PROC MEANS)";
var Salary;
run;
proc means data=Employees noprint;
var Salary;
output out=AvgSalary mean=Average_Salary;
run;
proc print data=AvgSalary noobs;
title "Average Salary (Formatted)";
format Average_Salary dollar12.2;
run;
title;
Part 4
data Bank_Customers;
length ID $4;
input ID $ 1-4
Balance 6-10
InterestRate 12-14;
datalines;
C001 12345 1.5
C002 54321 2.0
C003 67890 1.8
C004 98765 1.9
C005 13579 2.1
;
run;
proc print data=Bank_Customers;
title "Bank Customers";
label InterestRate="Interest Rate (%)";
format Balance comma12.0 InterestRate 4.1;
run;
proc means data=Bank_Customers noprint;
var Balance InterestRate;
output out=Bank_Summary sum(Balance)=Total_Balance
mean(InterestRate)=Average_InterestRate;
run;
proc print data=Bank_Summary;
title "Total Balance and Average Interest Rate";
format Total_Balance comma12.0 Average_InterestRate 4.2;
run;
title;
Part 5
data Transactions;
length ID $4;
input ID $ Date : mmddyy10. Amount : dollar8.;
format Date mmddyy10. Amount dollar10.2;
datalines;
T001 12/01/2023 $100.50
T002 11/15/2023 $250.00
T003 10/22/2023 $75.75
T004 09/18/2023 $500.25
T005 08/30/2023 $120.60
;
run;
proc print data=Transactions noobs;
title "Transactions (Formatted Date and Amount)";
run;
proc means data=Transactions noprint;
var Amount;
output out=Trans_Summary sum=Total_Amount mean=Average_Amount;
run;
proc print data=Trans_Summary noobs;
title "Total and Average Transaction Amount";
format Total_Amount Average_Amount dollar12.2;
run;
title;
Part 6
title "Employees Summary Report";
proc report data=Employees nowd headline headskip;
columns Age=Count_Employees Age=Avg_Age Salary=Total_Salary Salary=Avg_Salary;
define Count_Employees / analysis n "Employee Count";
define Avg_Age / analysis mean "Average Age" format=8.1;
define Total_Salary / analysis sum "Total Salary" format=dollar12.;
define Avg_Salary / analysis mean "Average Salary" format=dollar12.;
run;
title;