function Example161
randn('seed',0);
m1=[1, 1]'; m2=[3, 3]';m3=[2, 6]';
m=[m1 m2 m3];
S(:,:,1)=0.1*eye(2);
S(:,:,2)=0.2*eye(2);
S(:,:,3)=0.3*eye(2);
P=[0.4 0.4 0.2];
N=500;
sed=0;
[X,y]=mixt_model(m,S,P,N,sed);
plot_data(X,y,m,1)
m1_ini=[0; 2];m2_ini=[5; 2];m3_ini=[5; 5];
m_ini=[m1_ini m2_ini m3_ini];
s_ini=[.15 .27 .4];
Pa_ini=[1/3 1/3 1/3];
e_min=10^(-5);
[m_hat,s_hat,Pa,iter,Q_tot,e_tot]=em_alg_function(X,m_ini,s_ini,Pa_ini,e_min)
function [X,y]=mixt_model(m,S,P,N,sed)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FUNCTION
% [X,y]=mixt_model(m,S,P,N,sed)
% Generates a set of data vectors that stem from a mixture of normal
% distributions (also used in Chapter 2).
%
% INPUT ARGUMENTS:
% m: lxc matrix whose i-th column contains the
% (l-dimensional) mean of the i-th normal distribution.
% S: lxlxc matrix whose i-th lxl two-dimensional "slice" is the
% covariance matrix corresponding to the i-th normal distribution.
% P: c-dimensional vector whose i-th coordinate contains
% the a priori probability for the i-th normal distribution.
% N: the total number of points to be generated by the mixture
% distribution.
% sed: the seed used for the initialization of the built-in MATLAB
% random generator function "rand".
%
% OUTPUT ARGUMENTS:
% X: lxN matrix whose columns are the produced vectors.
% y: N-dimensional vector whose i-th element indicates the
% distribution generated the i-th vector.
%
% (c) 2010 S. Theodoridis, A. Pikrakis, K. Koutroumbas, D. Cavouras
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rand('seed',sed);
1
[l,c]=size(m);
%Constructing the P_acc vector. This is necessary for picking randomly
%one of the c normal distributions in order to generate a point.
P_acc=P(1);
for i=2:c
t=P_acc(i-1)+P(i);
P_acc=[P_acc t];
end
% Generation of the data set
X=[];
y=[];
for i=1:N
t=rand;
ind=sum(t>P_acc)+1; % Index of the normal distribution that will
generate the i-th vector
X=[X; mvnrnd(m(:,ind)',S(:,:,ind),1)];
y=[y ind];
end
X=X';
function plot_data(X,y,m,h)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FUNCTION
% plot_data(X,y,m,h)
% Plotting utility, capable of visualizing 2-dimensional datasets that
% consist of, at most, 7 classes. It plots with different colors: (a) the
% vectors of a data set that belong to different classes and (b) the mean
% vectors of the classes, provided that the data are 2-dimensional and the
% total number of classes is at most 7.
%
% INPUT ARGUMENTS:
% X: lxN matrix, whose columns are the data vectors to be plotted.
% y: N-dimensional vector whose i-th component is the class label
% of the i-th data vector.
% m: lxc matrix, whose j-th column corresponds to the
% mean vector of the j-th class.
% h: the handle of the figure on which the data will be plotted.
%
% (c) 2010 S. Theodoridis, A. Pikrakis, K. Koutroumbas, D. Cavouras
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[l,N]=size(X); % N=no. of data vectors, l=dimensionality
[l,c]=size(m); % c=no. of classes
if(l~=2) || (c>7)
fprintf('NO PLOT CAN BE GENERATED\n')
2
return
else
pale=['r.'; 'g.'; 'b.'; 'y.'; 'm.'; 'c.';'co'];
figure(h)
% Plot of the data vectors
hold on
for i=1:N
plot(X(1,i),X(2,i),pale(y(i),:))
hold on
end
% Plot of the class centroids
for j=1:c
plot(m(1,j),m(2,j),'k+')
hold on
end
end
function [m,s,Pa,iter,Q_tot,e_tot]=em_alg_function(x,m,s,Pa,e_min)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FUNCTION
% [m,s,Pa,iter,Q_tot,e_tot]=em_alg_function(x,m,s,Pa,e_min)
% EM algorithm for estimating the parameters of a mixture of normal
% distributions, with diagonal covariance matrices.
% WARNING: IT ONLY RUNS FOR THE CASE WHERE THE COVARIANCE MATRICES
% ARE OF THE FORM sigma^2*I. IN ADDITION, IF sigma_i^2=0 FOR SOME
% DISTRIBUTION AT AN ITERATION, IT IS ARBITRARILY SET EQUAL TO 0.001.
%
% INPUT ARGUMENTS:
% x: lxN matrix, each column of which is a feature vector.
% m: lxJ matrix, whos j-th column is the initial
% estimate for the mean of the j-th distribution.
% s: 1xJ vector, whose j-th element is the variance
% for the j-th distribution.
% Pa: J-dimensional vector, whose j-th element is the initial
% estimate of the a priori probability of the j-th distribution.
% e_min: threshold used in the termination condition of the EM
% algorithm.
%
% OUTPUT ARGUMENTS:
% m: it has the same structure with input argument m and contains
% the final estimates of the means of the normal distributions.
% s: it has the same structure with input argument s and contains
% the final estimates of the variances of the normal
% distributions.
% Pa: J-dimensional vector, whose j-th element is the final estimate
% of the a priori probability of the j-th distribution.
3
% iter: the number of iterations required for the convergence of the
% EM algorithm.
% Q_tot: vector containing the likelihood value at each iteration.
% e_tot: vector containing the error value at each itertion.
%
% (c) 2010 S. Theodoridis, A. Pikrakis, K. Koutroumbas, D. Cavouras
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
x=x';
m=m';
[p,n]=size(x);
[J,n]=size(m);
e=e_min+1;
Q_tot=[];
e_tot=[];
iter=0;
while (e>e_min)
iter=iter+1;
e;
P_old=Pa;
m_old=m;
s_old=s;
% Determine P(j|x_k; theta(t))
for k=1:p
temp=gauss(x(k,:),m,s);
P_tot=temp*Pa';
for j=1:J
P(j,k)=temp(j)*Pa(j)/P_tot;
end
end
% Determine the log-likelihood
Q=0;
for k=1:p
for j=1:J
Q=Q+P(j,k)*(-(n/2)*log(2*pi*s(j)) - sum( (x(k,:)-
m(j,:)).^2)/(2*s(j)) + log(Pa(j)) );
end
end
Q_tot=[Q_tot Q];
% Determine the means
for j=1:J
a=zeros(1,n);
for k=1:p
4
a=a+P(j,k)*x(k,:);
end
m(j,:)=a/sum(P(j,:));
end
% Determine the variances
for j=1:J
b=0;
for k=1:p
b=b+ P(j,k)*((x(k,:)-m(j,:))*(x(k,:)-m(j,:))');
end
s(j)=b/(n*sum(P(j,:)));
if(s(j)<10^(-10))
s(j)=0.001;
end
end
% Determine the a priori probabilities
for j=1:J
a=0;
for k=1:p
a=a+P(j,k);
end
Pa(j)=a/p;
end
e=sum(abs(Pa-P_old))+sum(sum(abs(m-m_old)))+sum(abs(s-s_old));
e_tot=[e_tot e];
end
function [z]=gauss(x,m,s)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FUNCTION (auxiliary)
% [z]=gauss(x,m,s)
% Takes as input the mean values and the variances of a number of Gaussian
% distributions and a vector x and computes the value of each
% Gaussian at x.
%
% NOTE: It is assumed that the covariance matrices of the gaussian
% distributions are diagonal with equal diagonal elements, i.e. it has the
% form sigma^2*I, where I is the identity matrix.
%
% INPUT ARGUMENTS:
% x: l-dimensional row vector, on which the values of the J
% gaussian distributions will be calculated
% m: Jxl matrix, whose j-th row corresponds to the
% mean of the j-th gaussian distribution
5
% s: J-dimensional row vector whose j-th component corresponds to
% the variance for the j-th gaussian distribution (it is assumed
% that the covariance matrices of the distributions are of the
% form sigma^2*I, where I is the lxl identity matrix)
%
% OUTPUT ARGUMENTS:
% z: J-dimensional vector whose j-th component is the value of the
% j-th gaussian distribution at x.
%
% (c) 2010 S. Theodoridis, A. Pikrakis, K. Koutroumbas, D. Cavouras
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[J,l]=size(m);
[p,l]=size(x);
z=[];
for j=1:J
t=(x-m(j,:))*(x-m(j,:))';
c=1/(2*pi*s(j))^(l/2);
z=[z c*exp(-t/(2*s(j)))];
end