% [nFailedSDCV, nFailedSICV] = KolSmitest(Data, wavname, database);
% 
% This test takes a data and applies Kolmogorov-Smirnov test in the ten FCV
% and SI-CV scenario.  For Si-CV scenario, user has to provide the wavname
% array which contains the name of each sample so that we can separate the
% data for each speaker.
% 
% [nFailedSDCV, nFailedSICV] = KolSmitest(Data, wavname, database);
% 
% Data = input data set
% wavname = character array containg name of each data sample to be used
%           for SI-CV
% database = name of the database to which the data belongs. Options are
%         'des'
%         'berlin'
%         'serbian'
%         'aibomont'
%         'aiboohm'
% 
% 
% 
% To run the K-S test directly on two datasets, use the following code:
% 
% clear
% 
% AiboM = load('~/Data/aiboMontlarge5C.mat');
% AiboO = load('~/Data/aiboOhmlarge5C.mat');
% 
% % interdatabase
% k = size(AiboM.Data,2);
% for i=1:k
%  test(i) = kstest2(AiboO.Data(:,i),AiboM.Data(:,i));
% end
% 
% Written by Ali Hassan 18 Mar 2011


function [nFailedSDCV, nFailedSICV] = KolSmitest(Data, wavname, database)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SDCV

k = 10;
m = size(Data,2);

CV = cvpartition(size(Data,1),'kfold',k);
for i=1:k
    fprintf('Working on fold %i .... \n',i);
    
    teData = Data(CV.test(i),:);
    trData = Data(CV.training(i),:);
    for j=1:m
        test(j) = kstest2(trData(:,j),teData(:,j));
    end
    nFailedSDCV(i) = sum(test);
    clear test
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SICV

switch lower(database)
    case 'des'
%         dw - Female
%         hw - Male
%         jw - Male
%         kw - Female
        [speaker] = unique(wavname(:,1));
        for i=1:length(speaker)
            speakeridx(:,i) = speaker(i) == wavname(:,1);
        end
    case 'berlin'
        [speaker] = unique(str2num(wavname(:,1:2)));
        for i=1:length(speaker)
            speakeridx(:,i) = speaker(i) == str2num(wavname(:,1:2));
        end
    case 'serbian'
%       SK
%       MV
%       MM
%       SZ
%       OK
%       BM
        [speaker] = ['sk';'mv';'mm';'sz';'ok';'bm'];
        for i=1:length(speaker)
            speakeridx(:,i) =  floor(sum( ...
                                repmat(speaker(i,:),[length(wavname) 1]) == ...
                                lower(wavname(:,1:2)) ...
                                   ,2) /2);
        end
        
    case 'aibomont'
        % 1,2,3,4, ...... 25
        [speaker] = 1:25;
        for i=1:length(speaker)
            speakeridx(:,i) = speaker(i) == str2num(wavname(:,6:7));
        end
        speakeridx(:,sum(speakeridx) == 0) = [];
    case 'aiboohm'
        % 1,2,3,4, ...... 32
        [speaker] = 1:32;
        for i=1:length(speaker)
            speakeridx(:,i) = speaker(i) == str2num(wavname(:,5:6));
        end
        speakeridx(:,sum(speakeridx) == 0) = [];
    
    otherwise
        error('Unidentified database name');
end
speakeridx(:,sum(speakeridx) == 0) = [];

k = size(speakeridx,2); % for speaker independent

for i=1:k
    fprintf('Working on speaker %i .... \n',i);

    teIdx = logical(speakeridx(:,i));
    trIdx = ~speakeridx(:,i);
    
    teData = Data(teIdx,:);
    trData = Data(trIdx,:);
    
    for j=1:m
        test(j) = kstest2(trData(:,j),teData(:,j));
    end
    nFailedSICV(i) = sum(test);
    clear test
end
