In [1]:
using SparseArrays
using LinearAlgebra
function simple_pagerank(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end
Out[1]:
simple_pagerank (generic function with 1 method)
In [2]:
##
function simple_pagerank_2(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  ei,ej,ev = findnz(A)
  P = sparse(ei,ej,ev./d[ei],size(A,1),size(A,2))

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound

  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  for iter=1:niter
    y = P'*x
    y .*= alpha
    missing = norm(x,1) - norm(y,1)
    y .+= missing/n
    x = y
  end
  return x
end

#
function simple_pagerank_3(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = zeros(n)

  niter = ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end
Out[2]:
simple_pagerank_3 (generic function with 1 method)
In [3]:
##
using NearestNeighbors
function gnk_with_dangling_and_preferential_attachment(n,k,d,p)
  xy = rand(2,n)
  T = BallTree(xy)
  idxs = knn(T, xy, k)[1]
  # form the edges for sparse
  ei = Int[]
  ej = Int[]
  for i=1:n
    if rand() < d # skip this node because it's a dangling
      # continue
      # run k steps of PA instead
      for s = 1:k
        v = rand(1:i)
        if length(ej) > 0
          j = rand(ej) # find a random node based on it's in-degree
        else
          j = rand(1:n)
        end
        push!(ei, v)
        push!(ej, j)
      end
      continue
    end
    for j=idxs[i]
      if i != j
        push!(ei,i)
        push!(ej,j)
      end
    end
  end
  # run p steps of preferential attachment
  for s=1:p
    v = rand(1:n)
    j = rand(ej) # find a random node based on it's in-degree
    push!(ei, v)
    push!(ej, j)
  end
  return xy, sparse(ei,ej,1.0,n,n)
end

using Random
Random.seed!(0) # make it consistent
@time xy, A = gnk_with_dangling_and_preferential_attachment(1_000_000,3,0.5, 300000)
@show extrema(sum(A,dims=1))
@show extrema(sum(A,dims=2))
┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/NearestNeighbors/Yj1kF.ji for NearestNeighbors [b8a86587-4115-5ab1-83bc-aa920d37bbce]
â”” @ Base loading.jl:1240
 22.277063 seconds (8.84 M allocations: 844.550 MiB, 4.44% gc time)
extrema(sum(A, dims=1)) = (0.0, 17706.0)
extrema(sum(A, dims=2)) = (0.0, 29.0)
Out[3]:
(0.0, 29.0)
In [4]:
##
@time x = simple_pagerank(A, 0.85)
@show extrema(x)
 35.851971 seconds (1.24 M allocations: 3.934 GiB, 1.89% gc time)
extrema(x) = (2.6338594437494965e-7, 0.003920142746450236)
Out[4]:
(2.6338594437494965e-7, 0.003920142746450236)
In [5]:
##
@time x2 = simple_pagerank_2(A,0.85)
@show extrema(x2)
 30.386858 seconds (454.74 k allocations: 3.562 GiB, 2.59% gc time)
extrema(x2) = (2.63385944373826e-7, 0.003920142746433663)
Out[5]:
(2.63385944373826e-7, 0.003920142746433663)
In [6]:
##
## This one skips extra iterations
@time x3 = simple_pagerank_3(A,0.85)
@show extrema(x3)
 22.164511 seconds (411.74 k allocations: 1.968 GiB, 1.28% gc time)
extrema(x3) = (2.6338594437494955e-7, 0.003920142746450235)
Out[6]:
(2.6338594437494955e-7, 0.003920142746450235)
In [7]:
##
norm(x-x2,1)
Out[7]:
4.265173895400525e-12
In [8]:
##
In [9]:
##
norm(x-x2,1)
Out[9]:
4.265173895400525e-12
In [10]:
##
@show norm(x,1)
@show sum(x)
norm(x, 1) = 0.9999999999996744
sum(x) = 0.9999999999999999
Out[10]:
0.9999999999999999
In [11]:
##
using KahanSummation
@show sum_kbn(x)
sum_kbn(x) = 1.0
Out[11]:
1.0
In [12]:
##
p = randperm(size(A,1))
@time Ap = A[p,p]
@time xp = simple_pagerank(Ap, 0.85)
x2 = similar(x)
x2[p] = xp
  2.067498 seconds (850.26 k allocations: 138.210 MiB, 2.27% gc time)
 38.848681 seconds (4.90 k allocations: 3.875 GiB, 1.93% gc time)
Out[12]:
1000000-element Array{Float64,1}:
 5.491081292420021e-7 
 6.573820349256703e-7 
 3.1971111909603536e-7
 5.301300671967482e-7 
 2.6338594437494965e-7
 7.972351951799552e-7 
 2.4833754910700754e-6
 2.6338594437494965e-7
 2.6338594437494965e-7
 6.669811441677247e-7 
 4.678790006241243e-7 
 2.6338594437494965e-7
 3.8082311090114644e-7
 â‹®                    
 2.6338594437494965e-7
 2.6338594437494965e-7
 1.7819987094732792e-6
 2.953685233347649e-7 
 2.6338594437494965e-7
 1.5454198561712926e-6
 3.9929054146159936e-6
 2.6338594437494965e-7
 9.770265263597898e-6 
 9.297269102367766e-7 
 3.3264882872056075e-7
 2.6338594437494965e-7
In [13]:
##
mapreduce(z -> abs(z[1]-z[2]), +, zip(x,x2))
Out[13]:
3.327991390970962e-16
In [14]:
##
using Plots
pyplot()
scatter(xy[1,:], xy[2,:], zcolor=x, markersize=0.5, alpha=0.5,
  markerstrokecolor=nothing, markerstrokewidth=0)
gui()
┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/Plots/ld3vC.ji for Plots [91a5bcdd-55d7-5caf-9e0b-520d859cae80]
â”” @ Base loading.jl:1240
┌ Info: Recompiling stale cache file /Users/dgleich/.julia/compiled/v1.2/PyPlot/oatAj.ji for PyPlot [d330b81b-6aea-500a-939a-2ce795aea3ee]
â”” @ Base loading.jl:1240