function [x,r,hist,nverts] = Lp_push_normalized(A,ei,omega,tol,maxsteps)
% Lp_push_normalized Implement a push strategy for solving Laplacian systems
%
% Lp_push_normalized(A,ei,alpha,tol,maxsteps)
% L = D-A
% Solve Lx=ei by exploiting the graph nature of
% L to only access vertex out-degrees.
%
% This algorithm is based on a solution using the normalized laplacian
% so that Lx = ei <-> (I - sqrt(D)^-1*A*sqrt(D)^-1)y = 1/sqrt(d_i) e_i
%
% David F. Gleich
% University of British Columbia, 2010

% History
% :2010-03-04: Initial coding based on Lp_push

% convert graph to csr
[rp,ci,ai] = sparse_to_csr(A);
N=length(rp)-1; % NUMBER of verties

%if ~exist('alpha','var') || isempty(alpha), alpha = 1/sqrt(N); end
if ~exist('omega','var') || isempty(omega), omega = 1.01; end
if ~exist('tol','var') || isempty(tol), tol = 1e-3; end
if ~exist('maxsteps','var') || isempty(maxsteps), maxsteps = 40*N; end

% Algorithm: We store the residual vector in an updatable max-heap
% structure.  Then, we examine the vertex with the largest residual and
% distribute its residual to the neighbors.

%ai = ai./rho; % make it diagonally dominant
d = diff(rp);
id = 1./d;
idhalf = 1./sqrt(d);

% x is the solution vector
x=zeros(N,1);
% r is the resiudal vector
r=zeros(N,1);
r(ei) = idhalf(ei);

% heap data structure
n=0; % n size of heap 
T=zeros(N,1); L=zeros(N,1);

% setup structure for the residual
nedges = 0;
hist = zeros(maxsteps,5); % nedges, max, sumresid, time
dt = tic; % start the timer
visited = zeros(N,1);
sumresid = 0;

% add elements from the current residual to the heap
for w=find(r)'
    sumresid = sumresid + abs(r(w));
    % add d(w) to the heap
    n=n+1; T(n)=w; L(w)=n; k=n;
    % move the element up the heap
    j=k; tj=T(j);
    while j>1,                       % j==1 => element at top of heap
        j2=floor(j/2); tj2=T(j2);    % parent element
        if abs(r(tj2))>abs(r(tj)), break;      % parent is larger, so done
        else                         % parent is smaller, so swap
            T(j2)=tj; L(tj)=j2; T(j)=tj2; L(tj2)=j; j=j2;
        end
    end  
end


step = 1;
for step=1:maxsteps
    v=T(1); ntop=T(n); T(1)=ntop; L(ntop)=1; n=n-1; % pop the head off the heap
    L(v) = 0; % remove v from the heap
    k=1; kt=ntop;                   % move element T(1) down the heap
    while 1,
        i=2*k; 
        if i>n, break; end          % end of heap
        if i==n, it=T(i);           % only one child, so skip
        else                        % pick the largest child
            lc=T(i); rc=T(i+1); it=lc;
            if abs(r(rc))>abs(r(lc)), i=i+1; it=rc; end % right child is larger
        end
        if abs(r(kt))>abs(r(it)), break;     % at correct place, so end
        else T(k)=it; L(it)=k; T(i)=kt; L(kt)=i; k=i; % swap
        end
    end                             % end heap down
    
    % check, this should sort the vector v
    % fprintf('Pushing from %4i val %f\n', v, d(v));
    visited(v) = 1;
    
    % now, v is the element with the largest residual.  Let's 
    % distribute it's rank to everyone else.
    
    % set the value to push out to others and kill our residual
    val = r(v);
    deltaresid = 0;
    % increment our rank
    x(v) = x(v) + omega*val;
    deltaresid = deltaresid - abs(r(v));
    r(v) = val-omega*val;
    deltaresid = deltaresid + abs(r(v));
    
    if r(v) ~= 0
        % re-add element v if it still has a residual
        % element not in heap, only move the element up the heap
        n=n+1; T(n)=v; L(v)=n; k=n; 
        % move the element up the heap
        j=k; tj=T(j);
        while j>1,                       % j==1 => element at top of heap
            j2=floor(j/2); tj2=T(j2);    % parent element
            if abs(r(tj2))>abs(r(tj)), break;      % parent is larger, so done
            else                         % parent is smaller, so swap
                T(j2)=tj; L(tj)=j2; T(j)=tj2; L(tj2)=j; j=j2;
            end
        end 
    end


    val = omega*val;
    a=idhalf(v);
    
    % for each vertex adjacent to v, push to it!
    for ei=rp(v):rp(v+1)-1            % ei is the edge index
        w=ci(ei);          % w is the target

        %sumresid = sumresid - abs(r(w));
        deltaresid = deltaresid - abs(r(w));
        r(w)=r(w)+val*a*idhalf(w); % increase the residual
        deltaresid = deltaresid + abs(r(w));
        %sumresid = sumresid + abs(r(w));
        % check if w is in the heap
        k=L(w); onlyup=0; 
        if k==0
            % element not in heap, only move the element up the heap
            n=n+1; T(n)=w; L(w)=n; k=n; kt=w; onlyup=1;
        else kt=T(k);
        end
        % update the heap, move the element down in the heap
        while 1 && ~onlyup,
            i=2*k; 
            if i>n, break; end          % end of heap
            if i==n, it=T(i);           % only one child, so skip
            else                        % pick the largest child
                lc=T(i); rc=T(i+1); it=lc;
                if abs(r(rc))>abs(r(lc)), i=i+1; it=rc; end % right child is larger
            end
            if abs(r(kt))>abs(r(it)), break;      % at correct place, so end
            else T(k)=it; L(it)=k; T(i)=kt; L(kt)=i; k=i; % swap
            end
        end
        % move the element up the heap
        j=k; tj=T(j);
        while j>1,                       % j==1 => element at top of heap
            j2=floor(j/2); tj2=T(j2);    % parent element
            if abs(r(tj2))>abs(r(tj)), break;      % parent is larger, so done
            else                         % parent is smaller, so swap
                T(j2)=tj; L(tj)=j2; T(j)=tj2; L(tj2)=j; j=j2;
            end
        end  
    end
    
    nedges = nedges + (rp(v+1) - rp(v));
    sumresid = sumresid + deltaresid;
    
    hist(step,1) = nedges;
    if n>0, resid = abs(r(T(1))); else resid = 0; end
    hist(step,2) = resid;
    hist(step,3) = sumresid;
    hist(step,4) = v;
    hist(step,5) = toc(dt);
    
    if abs(resid) < tol
        break;
    end
end



x = x./idhalf; % normalize the solution

hist = hist(1:step,:); % truncate hist
nverts = sum(visited);