using SparseArrays
using LinearAlgebra
function simple_pagerank(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end

simple_pagerank (generic function with 1 method)

##
function simple_pagerank_2(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  ei,ej,ev = findnz(A)
  P = sparse(ei,ej,ev./d[ei],size(A,1),size(A,2))

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound

  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  for iter=1:niter
    y = P'*x
    y .*= alpha
    missing = norm(x,1) - norm(y,1)
    y .+= missing/n
    x = y
  end
  return x
end

#
function simple_pagerank_3(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = zeros(n)

  niter = ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end

simple_pagerank_3 (generic function with 1 method)

##
using NearestNeighbors
function gnk_with_dangling_and_preferential_attachment(n,k,d,p)
  xy = rand(2,n)
  T = BallTree(xy)
  idxs = knn(T, xy, k)[1]
  # form the edges for sparse
  ei = Int[]
  ej = Int[]
  for i=1:n
    if rand() < d # skip this node because it's a dangling
      # continue
      # run k steps of PA instead
      for s = 1:k
        v = rand(1:i)
        if length(ej) > 0
          j = rand(ej) # find a random node based on it's in-degree
        else
          j = rand(1:n)
        end
        push!(ei, v)
        push!(ej, j)
      end
      continue
    end
    for j=idxs[i]
      if i != j
        push!(ei,i)
        push!(ej,j)
      end
    end
  end
  # run p steps of preferential attachment
  for s=1:p
    v = rand(1:n)
    j = rand(ej) # find a random node based on it's in-degree
    push!(ei, v)
    push!(ej, j)
  end
  return xy, sparse(ei,ej,1.0,n,n)
end

using Random
Random.seed!(0) # make it consistent
@time xy, A = gnk_with_dangling_and_preferential_attachment(1_000_000,3,0.5, 300000)
@show extrema(sum(A,dims=1))
@show extrema(sum(A,dims=2))

┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/NearestNeighbors/Yj1kF.ji for NearestNeighbors [b8a86587-4115-5ab1-83bc-aa920d37bbce]
└ @ Base loading.jl:1240

 22.277063 seconds (8.84 M allocations: 844.550 MiB, 4.44% gc time)
extrema(sum(A, dims=1)) = (0.0, 17706.0)
extrema(sum(A, dims=2)) = (0.0, 29.0)

(0.0, 29.0)

##
@time x = simple_pagerank(A, 0.85)
@show extrema(x)

 35.851971 seconds (1.24 M allocations: 3.934 GiB, 1.89% gc time)
extrema(x) = (2.6338594437494965e-7, 0.003920142746450236)

(2.6338594437494965e-7, 0.003920142746450236)

##
@time x2 = simple_pagerank_2(A,0.85)
@show extrema(x2)

 30.386858 seconds (454.74 k allocations: 3.562 GiB, 2.59% gc time)
extrema(x2) = (2.63385944373826e-7, 0.003920142746433663)

(2.63385944373826e-7, 0.003920142746433663)

##
## This one skips extra iterations
@time x3 = simple_pagerank_3(A,0.85)
@show extrema(x3)

 22.164511 seconds (411.74 k allocations: 1.968 GiB, 1.28% gc time)
extrema(x3) = (2.6338594437494955e-7, 0.003920142746450235)

(2.6338594437494955e-7, 0.003920142746450235)

##
norm(x-x2,1)

4.265173895400525e-12

##

##
norm(x-x2,1)

4.265173895400525e-12

##
@show norm(x,1)
@show sum(x)

norm(x, 1) = 0.9999999999996744
sum(x) = 0.9999999999999999

0.9999999999999999

##
using KahanSummation
@show sum_kbn(x)

sum_kbn(x) = 1.0

1.0

##
p = randperm(size(A,1))
@time Ap = A[p,p]
@time xp = simple_pagerank(Ap, 0.85)
x2 = similar(x)
x2[p] = xp

  2.067498 seconds (850.26 k allocations: 138.210 MiB, 2.27% gc time)
 38.848681 seconds (4.90 k allocations: 3.875 GiB, 1.93% gc time)

1000000-element Array{Float64,1}:
 5.491081292420021e-7 
 6.573820349256703e-7 
 3.1971111909603536e-7
 5.301300671967482e-7 
 2.6338594437494965e-7
 7.972351951799552e-7 
 2.4833754910700754e-6
 2.6338594437494965e-7
 2.6338594437494965e-7
 6.669811441677247e-7 
 4.678790006241243e-7 
 2.6338594437494965e-7
 3.8082311090114644e-7
 ⋮                    
 2.6338594437494965e-7
 2.6338594437494965e-7
 1.7819987094732792e-6
 2.953685233347649e-7 
 2.6338594437494965e-7
 1.5454198561712926e-6
 3.9929054146159936e-6
 2.6338594437494965e-7
 9.770265263597898e-6 
 9.297269102367766e-7 
 3.3264882872056075e-7
 2.6338594437494965e-7

##
mapreduce(z -> abs(z[1]-z[2]), +, zip(x,x2))

3.327991390970962e-16

##
using Plots
pyplot()
scatter(xy[1,:], xy[2,:], zcolor=x, markersize=0.5, alpha=0.5,
  markerstrokecolor=nothing, markerstrokewidth=0)
gui()

┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/Plots/ld3vC.ji for Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]
└ @ Base loading.jl:1240
┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/PyPlot/oatAj.ji for PyPlot [d330b81b-6aea-500a-939a-2ce795aea3ee]
└ @ Base loading.jl:1240