%% Examine localization of Katz scores
% For alpha = 2*1/max(degree), we check localization of the Katz scores for
% a vertex over a wide range of graphs.  
% Localization is defined as the number of vertices with accounting for 90%
% of the mass of the Katz score vector.  To be completely precise, let
% K = (I-aA) be the Katz matrix
% a Katz vector is K\ei where ei is a unit vector with a 1 in the ith entry
% let x = K\ei, sort the non-zeros of x by decreasing magnitude and 
% check how many account for 90% of the mass of the vector, without the
% diagonal entry of course.

%% Basic setup
% First add information to the paths
addpath('../../matlab'); % add the directory with the katz_push algorithm
addpath('~/dev/gaimc');

%%
% Set the graphs
graphs = {'as20000102','ca-HepTh','flickr2','soc-Slashdot0811',...
    'cit-HepPh', 'soc-Slashdot0902', 'cit-HepTh', 'soc-Epinions1', ...
    'ca-AstroPh',  'ca-CondMat', 'wiki-Talk', ...
    'ca-GrQc', 'email-Enron', 'wiki-Vote', 'ca-HepPh',	'email-EuAll'};

datadir = '../../data';

%%
% Set the number of trials
ntrials = 5000;
nsamp = 25;

%% 
% Set a random seed
seed = 0;

%%
% Set a tolerance
tol = 1e-7;
maxsteps = 5000000;

%% 
% Set the points to evaluate localization
locpts = [0.1 0.25 0.5 0.75 0.9 0.99];

%% Run the experimental sweep

results = [];
results_eigs = [];
for gi = 1:length(graphs)
    graph = graphs{gi};
    graphfile = fullfile(datadir, [graph '.smat']);
    A = readSMAT(graphfile);
    A = A|A';
    A = largest_component(A);
    A = spones(A);
    n = size(A,1);
    rand('state',seed);
    %alpha = 2/max(sum(A));
    
    eigs_st = tic;
    [V,D] = eigs(A);
    results_eigs(end+1).dt = toc(eigs_st);
    results_eigs(end).D = diag(D);
    results_eigs(end).V = V;
    alpha = 1/(max(diag(D))+1);
    
    % precompute the csr form of A
    
    verts = randperm(n);
    for ti=1:min(ntrials,n)
        % pick a vertex 
        vertex = verts(ti);
        result = [];
        result.graph = graph;
        result.alpha = alpha;
        result.trial = ti;
        result.vertex = vertex;
        result.tol = tol;
        st=tic;
        [x,r,hist,nverts]=katz_push_mex(A,alpha,vertex,tol,maxsteps);
        x(vertex) = 0;
        dt=toc(st);
        result.time = dt;
        result.nverts = nverts;
        result.nsteps = length(hist);
        xs = sort(x,'descend');
        xs = cumsum(xs);
        loc = zeros(size(locpts));
        for li=1:length(locpts)
            loc(li) = find(xs>locpts(li)*xs(end),1,'first');
        end
        result.loc = loc;

        fprintf('%10s  %6f  %5i  %6.1f (s)  %5i  %8.2e  %i\n',...
            graph, alpha, vertex, dt, nverts, norm(r), loc(end-3));

        if isempty(results), results = result; 
        else  results(end+1) = result;
        end
    end
end

%%
% Show a histogram of number of vertices explored
clear hist
hist([results.nverts])

%% 
% Show a histogram of the number of steps
nsteps = [];
for i=1:length(results)
    nsteps(i) = length(results(i).hist);
end
hist(nsteps);
