Name: Shahbaz Iftikhar
Reg No: BSCS-22S-0034
Semester: 07
Course: Parallel and Distributed Computing
Assignment: 03
Task 1:
Parallel Loop Optimization (CLO-1, CLO-3) • Write a sequential C
program that computes the sum of a large array (e.g., 1 million
elements). • Parallelize it using OpenMP (parallel for with reduction). •
Experiment with different scheduling policies (static, dynamic, guided)
and compare execution times. • Deliverable: o Source code (.c file). o A
brief report (1 page) explaining the performance differences.
CODE:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#define LENGTH 1000000
long long get_time_ms() {
struct timeval t;
gettimeofday(&t, NULL);
return (t.tv_sec * 1000LL) + (t.tv_usec / 1000);
}
void fill_array(int *data, int count) {
srand(time(NULL));
for (int i = 0; i < count; i++) {
data[i] = rand() % 100;
}
}
long long sum_sequential(int *data, int count) {
long long total = 0;
for (int i = 0; i < count; i++) {
total += data[i];
}
return total;
}
long long sum_parallel(int *data, int count) {
long long total = 0;
#pragma omp parallel for reduction(+:total)
for (int i = 0; i < count; i++) {
total += data[i];
}
return total;
}
long long sum_static(int *data, int count) {
long long total = 0;
#pragma omp parallel for reduction(+:total) schedule(static, 100)
for (int i = 0; i < count; i++) {
total += data[i];
}
return total;
}
long long sum_dynamic(int *data, int count) {
long long total = 0;
#pragma omp parallel for reduction(+:total) schedule(dynamic, 100)
for (int i = 0; i < count; i++) {
total += data[i];
}
return total;
}
long long sum_guided(int *data, int count) {
long long total = 0;
#pragma omp parallel for reduction(+:total) schedule(guided, 100)
for (int i = 0; i < count; i++) {
total += data[i];
}
return total;
}
int main() {
int *arr = malloc(LENGTH * sizeof(int));
if (!arr) {
printf("No memory\n");
return 1;
}
fill_array(arr, LENGTH);
long long t0, t1;
t0 = get_time_ms();
long long seq = sum_sequential(arr, LENGTH);
t1 = get_time_ms();
printf("Sequential: %lld ms | Sum = %lld\n", t1 - t0, seq);
t0 = get_time_ms();
long long par = sum_parallel(arr, LENGTH);
t1 = get_time_ms();
printf("Parallel Default: %lld ms | Sum = %lld\n", t1 - t0, par);
t0 = get_time_ms();
par = sum_static(arr, LENGTH);
t1 = get_time_ms();
printf("Static: %lld ms | Sum = %lld\n", t1 - t0, par);
t0 = get_time_ms();
par = sum_dynamic(arr, LENGTH);
t1 = get_time_ms();
printf("Dynamic: %lld ms | Sum = %lld\n", t1 - t0, par);
t0 = get_time_ms();
par = sum_guided(arr, LENGTH);
t1 = get_time_ms();
printf("Guided: %lld ms | Sum = %lld\n", t1 - t0, par);
free(arr);
return 0;
}
OUTPUT:
Explanation:
This task focuses on computing the sum of a large array (1 million elements). Initially, the
program computes the sum sequentially using a simple loop. Then, OpenMP is used to
parallelize the same task with the #pragma omp parallel for directive and the
reduction clause to safely aggregate partial results from multiple threads.
We implemented and compared three OpenMP scheduling strategies:
● Static scheduling: Divides the loop iterations equally among threads before execution.
● Dynamic scheduling: Threads grab chunks of iterations as they finish their previous
work. This helps with load balancing.
● Guided scheduling: Starts with large chunks that decrease over time, balancing load and
reducing overhead.
Performance Results:
● Static performed best with uniform workloads (e.g., each array element has similar
processing time).
● Dynamic and Guided scheduling are better for uneven workloads.
● Parallel reduction showed significant speedup compared to sequential summation.
Task 2:
Task Parallelism with OpenMP (CLO-3) • Implement a program that
performs two independent computations (e.g., matrix multiplication and
Fibonacci series) using OpenMP sections or task constructs. • Measure
speedup compared to sequential execution.
CODE:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#define DIM 200
#define FIB_N 40
long long current_ms() {
struct timeval tv;
gettimeofday(&tv, NULL);
return (tv.tv_sec * 1000LL) + (tv.tv_usec / 1000);
}
void matrix_op() {
int mat1[DIM][DIM], mat2[DIM][DIM], result[DIM][DIM];
for (int i = 0; i < DIM; i++)
for (int j = 0; j < DIM; j++) {
mat1[i][j] = i + j;
mat2[i][j] = i - j;
}
for (int i = 0; i < DIM; i++)
for (int j = 0; j < DIM; j++) {
result[i][j] = 0;
for (int k = 0; k < DIM; k++)
result[i][j] += mat1[i][k] * mat2[k][j];
}
}
int calc_fib(int n) {
if (n <= 1) return n;
return calc_fib(n - 1) + calc_fib(n - 2);
}
void fib_task() {
int val = calc_fib(FIB_N);
}
int main() {
double t_serial, t_parallel;
long long start, end;
start = current_ms();
matrix_op();
fib_task();
end = current_ms();
t_serial = (double)(end - start) / 1000.0;
printf("Time Serial: %.2f s\n", t_serial);
start = current_ms();
#pragma omp parallel sections
{
#pragma omp section
matrix_op();
#pragma omp section
fib_task();
}
end = current_ms();
t_parallel = (double)(end - start) / 1000.0;
printf("Time Parallel: %.2f s\n", t_parallel);
printf("Speedup: %.2f\n", t_serial / t_parallel);
return 0;
}
OUTPUT:
Explanation:
This task demonstrates task parallelism by executing two independent computations:
1. Matrix multiplication of two 200×200 matrices.
2. Recursive calculation of the 40th Fibonacci number.
First, both tasks were executed sequentially. Then, OpenMP’s #pragma omp parallel sections
was used to run them in parallel on separate threads. Each task was assigned to a separate section.
Observations:
● Parallel execution led to reduced total execution time.
● OpenMP sections are ideal when two or more independent tasks can be run simultaneously.
● Speedup was calculated by dividing sequential time by parallel time, showing clear improvement
through parallelism.