%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% HMM-EEG analysis - data preparation %
% %
% This file belongs to the following paper: %
% The Discovery of Processing Stages: %
% Analyzing EEG data with Hidden Markov Models. %
% Borst, J.P. & Anderson, J.R. %
% %
% %
% Jelmer Borst (j.p.borst@rug.nl) %
% John Anderson %
% 140723 %
% %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Instructions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This file contains matlab code to prepare the data for the HSMM analysis.
% It contains 2 steps:
% - snapshot creation
% - normalization + PCA + normalization
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% General
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%set the working directory to the location of this file.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('hmm_data_for_preparation.mat');
% This loads:
% data_base: 4894022x36 matrix that contains the preprocessed EEG data.
% The first 4 columns contain subject number, trial number,
% sample-in-trial, and condition information, respectively.
% The other 32 columns contain the data. Each row contains one
% data sample. The columns contain the electrodes in the
% following order: FP1, FPZ, FP2, F7, F3, FZ, F4, F8, FT7, FC3,
% FCZ, FC4, FT8, T7, C3, CZ, C4, T8, TP7, CP3, CPZ, CP4, TP8,
% P7, P3, PZ, P4, P8, POZ, O1, OZ, and O2.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Snapshot creation (takes some time)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data_base_multi = []; %data_base_multi will contain the snapshots
rate = 250; %original rate is 250 hz
ssample = 80; %snapshot length is 80 ms
multfac = ssample/(1000/rate);
for subj = unique(data_base(:,1))'
disp(subj);
newdat_base_multi = [];
newtrials_multi = [];
newsamples_multi = [];
conditions_multi = [];
trials_subj = unique(data_base(data_base(:,1) == subj,2))'; %trials for this subject
%snapshot creation trial by trial
for tr = trials_subj
cur_condition = data_base(data_base(:,1) == subj & data_base(:,2) == tr,4);
cur_condition = cur_condition(1);
tmp = data_base(data_base(:,1) == subj & data_base(:,2) == tr,5:end); %data of trial
rem = mod(size(tmp,1),multfac); %remove incomplete snapshot data
tmp = tmp(1:(end-rem),:);
tmp2 = nan(size(tmp,1)/multfac,32*multfac); %make a new matrix that will be filled with the snapshots
for fac = 1:multfac %fill the new matrix
tmp2(:,(1:32) + 32*(fac-1)) = tmp(fac:multfac:size(tmp,1),:);
end
newdat_base_multi = vertcat(newdat_base_multi, tmp2);
newtrials_multi = vertcat(newtrials_multi, repmat(tr,size(tmp2,1),1));
newsamples_multi = vertcat(newsamples_multi, (1:size(tmp2,1))');
conditions_multi = vertcat(conditions_multi, repmat(cur_condition,size(tmp2,1),1));
end
data_base_multi = vertcat(data_base_multi, [repmat(subj,size(newdat_base_multi,1),1) newtrials_multi newsamples_multi conditions_multi newdat_base_multi]);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Normalization, PCA, Normalization
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Normalize data
[score, mu_base_multi_1, sigma_base_multi_1] = zscore(data_base_multi(:,5:end));
data_base_multi(:,5:end) = score;
% PCA
data_base_multi_pca = data_base_multi; %create new matrix for pca'd data by copying the original one
[coeff_base_multi, score, lat_base_multi] = princomp(score);
% Normalize PCA results
[score, mu, sigma] = zscore(score);
data_base_multi_pca(:,5:end) = score;
% Also normalize coefficients: back to original still = score * coeff'
for idx = 1:size(score,2)
coeff_base_multi(:,idx) = coeff_base_multi(:,idx) .* sigma(idx) + mu(idx);
end
% Save
save('hmm_data.mat', ...
'data_base_multi_pca', 'data_base_multi', 'coeff_base_multi', 'lat_base_multi', 'mu_base_multi_1', 'sigma_base_multi_1');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%