function [S, comp] = compound_identification(results,library,library_name,d_cluster,t_cluster,t_peak,t_corr)
%compound_identification - Dataset peak matching to reference metabolite spectra
%This function matches the input dataset peaks to a set of reference metabolites
%characterized by their spectral peaks (ppm and intensity).
%
% Syntax:  [S, comp] = compound_identification(results,library,library_name,d_cluster,t_cluster,t_peak,t_corr)
%
% Inputs:
%  - results:      focus output
%  - library:      cell variable containing metabolite reference spectra
%  - library_name: library name (string)
%  - d_cluster:    minimum ppm-distance between peaks to apply cluster splitting
%  - t_cluster:    cluster search interval expansion
%  - t_peak:       intra-cluster peak ppm tolerance
%  - t_corr:       minimum correlation for matching peaks
%
% Outputs:
%  -S:             Matrix containing information about all metabolite
%                  matchs.
%  - comp:         Cell struct containing information about each metabolite
%                  matchs. 
%
% Author:  Arnald Alonso - Rheumatology Research Group (VHIR)
% email:   arnald.alonso@vhir.org
% Website: http://www.urr.cat
% June 2013; Last revision: 01-June-2013

%------------- BEGIN CODE --------------
%% PARAMETER VALIDATION
if d_cluster <= 0, d_cluster = 0.05;  end
if t_cluster <= 0, t_cluster = 0.01;  end
if t_peak <= 0,    t_peak    = 0.002; end
if t_corr <= 0,    t_corr    = 0.8;   end
%% DIRECTORY STRUCTURE
D    = dir(results.name);
isub = [D(:).isdir]; %# returns logical vector
D    = {D(isub).name}';
if any(strcmp(D,'library'))==1,
    D    = dir(sprintf('%s/library',results.name));
    isub = [D(:).isdir]; %# returns logical vector
    D    = {D(isub).name}';
    if any(strcmp(D,library_name))==0,
        mkdir(sprintf('%s/library/%s',results.name,library_name));
        mkdir(sprintf('%s/library/%s/figures',results.name,library_name));
    else
        D    = dir(sprintf('%s/library/%s',results.name,library_name));
        isub = [D(:).isdir]; %# returns logical vector
        D    = {D(isub).name}';
        if any(strcmp(D,'figures'))==0,
            mkdir(sprintf('%s/library/%s/figures',results.name,library_name));
        end
    end
else
    mkdir(sprintf('%s/library',results.name));
    mkdir(sprintf('%s/library/%s',results.name,library_name));
    mkdir(sprintf('%s/library/%s/figures',results.name,library_name));
end
%% VARIABLES
intensities = zeros(length(results.peaksR),size(results.input.data,2));
ppms        = zeros(length(results.peaksR),1);
indexes     = zeros(length(results.peaksR),1);
for i=1:length(results.peaksR),
    intensities(i,:) = max([results.peaksR{i}.incs;zeros(size(results.peaksR{i}.incs))]);
    ppms(i)          = results.peaksR{i}.ppm;
    indexes(i)       = results.peaksR{i}.index;
end
C  = corrcoef(intensities');
I  = median(intensities,2);
M  = median(intensities);
ct = corrcoef([intensities;M]');
ct = ct(end,1:(end-1));
%% COMPOUND IDENTIFICATION WORKFLOW
fileID  = fopen(sprintf('%s/library/%s/compoundID.txt',results.name,library_name),'w');
fileID2 = fopen(sprintf('%s/library/%s/compoundID_details.txt',results.name,library_name),'w');
fprintf(fileID,'METABOLITE\tNclusters\tN-intra\tN-intra-matched\tN-inter-matched\tINTRA-MATCHING\tINTER-MATCHING\n');
comp    = cell(1,length(library));
S       = [];
for i=1:length(library),
    % COMPOUND IDENTIFICATION
    comp{i} = clusterize(library{i},d_cluster,t_cluster,t_peak);
    fprintf('- Metabolite %d/%d (%s)...\n',i,length(library),comp{i}.name);
    comp{i} = intracluster(comp{i},ppms,C,t_peak,t_corr);
    comp{i} = intercluster(comp{i},ppms,C,t_corr);
    s       = sprintf('%s/library/%s/figures/id_%d_%s',results.name,library_name,i,comp{i}.name);
    stats   = resume_compound(comp{i},ppms,I,C,indexes,ct,1,s);
    % COMPOUND MATCHING STATISTICS
    kintra  = 0;
    kintraM = 0;
    kinterM = 0;
    for j=1:comp{i}.nclusters,
        if length(find(comp{i}.spect(:,3)==j)) > 1,
            kintra = kintra + 1;
            if comp{i}.cluster{j}.id == 1,
                kintraM = kintraM + (length(find(comp{i}.cluster{j}.indM>0))>1);
            end
        end
    end
    if comp{i}.nclusters > 1,
        t2 = sprintf('%1.2f',size(comp{i}.intercluster,2)/comp{i}.nclusters);
    else
        t2 = 'NA';
    end
    if kintra > 0,
        t1 = sprintf('%1.2f',kintraM/kintra);
    else
        t1 = 'NA';
    end 
    fprintf(fileID,'%s\t%d\t%d\t%d\t%d\t%s\t%s\n',comp{i}.name,comp{i}.nclusters,kintra,kintraM,size(comp{i}.intercluster,2),t1,t2);
    % DETAILS
    fprintf(fileID2,'# COMPOUND\n%s\n# REFERENCE\ncluster\tppm\tintensity\n',comp{i}.name);
    for j = 1:size(comp{i}.spect), fprintf(fileID2,'%d\t%1.4f\t%1.2f\n',comp{i}.spect(j,3),comp{i}.spect(j,1),comp{i}.spect(j,2)); end
    fprintf(fileID2,'# MATCHING STATISTICS\nNclusters\tN-intra\tN-intra-matched\tN-inter-matched\tINTRA-MATCHING\tINTER-MATCHING\n');
    fprintf(fileID2,'%d\t%d\t%d\t%d\t%s\t%s\n',comp{i}.nclusters,kintra,kintraM,size(comp{i}.intercluster,2),t1,t2);
    fprintf(fileID2,'# PEAK DETAILS\nid_cluster\tid_peak\tppm\tintensity\tintracluster-match\tintracluster-corr\tintercluster-match\tintercluster-corr\n');
    for j = 1:size(stats,1), fprintf(fileID2,'%d\t%d\t%1.3f\t%1.2e\t%d\t%1.2f\t%d\t%1.2f\n',stats(j,:)); end
    comp{i}.peaks = stats;
    S = [S; [i*ones(size(stats,1),1) stats]];
end
fclose(fileID);
fclose(fileID2);
S = S( (S(:,7) > t_corr) | (S(:,9) > t_corr) , : );
%% PEAKLIST FILE
fileID  = fopen(sprintf('%s/library/%s/compoundID_peaklist.txt',results.name,library_name),'w');
fprintf(fileID,'id_peak\tppm\tintensity\tid_metabolite\tid_cluster\tintracluster-match\tintracluster-corr\tintercluster-match\tintercluster-corr\n');
for i=1:size(S,1),
    fprintf(fileID,'%d\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n',S(i,3),S(i,4),S(i,5),library{S(i,1)}.name,S(i,2),S(i,6),S(i,7),S(i,8),S(i,9));
end
fclose(fileID);