In [1]:
using SparseArrays
using LinearAlgebra
function simple_pagerank(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end
Out[1]:
simple_pagerank (generic function with 1 method)
In [2]:
##
function simple_pagerank_2(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  ei,ej,ev = findnz(A)
  P = sparse(ei,ej,ev./d[ei],size(A,1),size(A,2))

  niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound

  x = Vector{T}(undef, n)
  fill!(x, one(T)/n) # initialize to 1/n

  for iter=1:niter
    y = P'*x
    y .*= alpha
    missing = norm(x,1) - norm(y,1)
    y .+= missing/n
    x = y
  end
  return x
end

#
function simple_pagerank_3(A::SparseMatrixCSC, alpha::T) where T
  n = size(A,1)
  d = vec(sum(A,dims=2)) # compute the out-degrees
  x = zeros(n)

  niter = ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
  for iter=1:niter
    dx = sum(x[d .== 0])
    x ./= d
    y = A'*x # compute a matrix vector product
    y .*= alpha
    y .+= (1.0-alpha)/n
    y .+= alpha*dx/n
    x = y
  end
  return x
end
Out[2]:
simple_pagerank_3 (generic function with 1 method)
In [3]:
##
using NearestNeighbors
function gnk_with_dangling_and_preferential_attachment(n,k,d,p)
  xy = rand(2,n)
  T = BallTree(xy)
  idxs = knn(T, xy, k)[1]
  # form the edges for sparse
  ei = Int[]
  ej = Int[]
  for i=1:n
    if rand() < d # skip this node because it's a dangling
      # continue
      # run k steps of PA instead
      for s = 1:k
        v = rand(1:i)
        if length(ej) > 0
          j = rand(ej) # find a random node based on it's in-degree
        else
          j = rand(1:n)
        end
        push!(ei, v)
        push!(ej, j)
      end
      continue
    end
    for j=idxs[i]
      if i != j
        push!(ei,i)
        push!(ej,j)
      end
    end
  end
  # run p steps of preferential attachment
  for s=1:p
    v = rand(1:n)
    j = rand(ej) # find a random node based on it's in-degree
    push!(ei, v)
    push!(ej, j)
  end
  return xy, sparse(ei,ej,1.0,n,n)
end

using Random
Random.seed!(0) # make it consistent
@time xy, A = gnk_with_dangling_and_preferential_attachment(1_000_000,3,0.5, 300000)
@show extrema(sum(A,dims=1))
@show extrema(sum(A,dims=2))
  6.767769 seconds (4.85 M allocations: 546.759 MiB, 5.72% gc time, 6.86% compilation time)
extrema(sum(A, dims = 1)) = (0.0, 9581.0)
extrema(sum(A, dims = 2)) = (0.0, 27.0)
Out[3]:
(0.0, 27.0)
In [4]:
##
@time x = simple_pagerank(A, 0.85)
@show extrema(x)
 13.944109 seconds (1.38 M allocations: 3.941 GiB, 5.65% gc time, 2.49% compilation time)
extrema(x) = (2.617334190260745e-7, 0.0022751054764767516)
Out[4]:
(2.617334190260745e-7, 0.0022751054764767516)
In [5]:
##
@time x2 = simple_pagerank_2(A,0.85)
@show extrema(x2)
 14.064803 seconds (474.90 k allocations: 3.564 GiB, 5.00% gc time, 2.46% compilation time)
extrema(x2) = (2.6173341902656334e-7, 0.0022751054764809596)
Out[5]:
(2.6173341902656334e-7, 0.0022751054764809596)
In [6]:
##
## This one skips extra iterations
@time x3 = simple_pagerank_3(A,0.85)
@show extrema(x3)
  7.459238 seconds (206.37 k allocations: 1.958 GiB, 5.54% gc time, 1.77% compilation time)
extrema(x3) = (2.6173341902607444e-7, 0.0022751054764767503)
Out[6]:
(2.6173341902607444e-7, 0.0022751054764767503)
In [7]:
##
norm(x-x2,1)
Out[7]:
1.867535545206317e-12
In [8]:
##
In [9]:
##
norm(x-x2,1)
Out[9]:
1.867535545206317e-12
In [10]:
##
@show norm(x,1)
@show sum(x)
norm(x, 1) = 0.9999999999999538
sum(x) = 1.0000000000000002
Out[10]:
1.0000000000000002
In [11]:
##
using KahanSummation
@show sum_kbn(x)
ArgumentError: Package KahanSummation not found in current path.
- Run `import Pkg; Pkg.add("KahanSummation")` to install the KahanSummation package.

Stacktrace:
 [1] macro expansion
   @ ./loading.jl:2296 [inlined]
 [2] macro expansion
   @ ./lock.jl:273 [inlined]
 [3] __require(into::Module, mod::Symbol)
   @ Base ./loading.jl:2271
 [4] #invoke_in_world#3
   @ ./essentials.jl:1089 [inlined]
 [5] invoke_in_world
   @ ./essentials.jl:1086 [inlined]
 [6] require(into::Module, mod::Symbol)
   @ Base ./loading.jl:2260
In [12]:
##
p = randperm(size(A,1))
@time Ap = A[p,p]
@time xp = simple_pagerank(Ap, 0.85)
x2 = similar(x)
x2[p] = xp
  0.970445 seconds (494.71 k allocations: 129.595 MiB, 1.50% gc time, 40.41% compilation time)
 14.184382 seconds (4.45 k allocations: 3.873 GiB, 6.19% gc time)
Out[12]:
1000000-element Vector{Float64}:
 1.8689317121525526e-6
 4.786449900439338e-7
 7.176027319087448e-7
 2.6173341902607455e-7
 1.3470631403702117e-6
 5.187482925172743e-7
 5.168779548279929e-7
 4.174648033465889e-7
 4.1000905666170306e-7
 2.6173341902607455e-7
 2.6173341902607455e-7
 3.460948404400897e-7
 3.729701221121562e-7
 ⋮
 1.1694232158464362e-6
 3.729701221121562e-7
 2.6257239994553696e-6
 4.1777379418849465e-7
 3.062281002605072e-7
 2.6173341902607455e-7
 2.6173341902607455e-7
 2.6173341902607455e-7
 2.6173341902607455e-7
 2.6173341902607455e-7
 7.385874663913488e-7
 2.6173341902607455e-7
In [13]:
##
mapreduce(z -> abs(z[1]-z[2]), +, zip(x,x2))
Out[13]:
5.698913902092186e-16
In [14]:
##
using Plots
pyplot()
scatter(xy[1,:], xy[2,:], zcolor=x, markersize=0.5, alpha=0.5,
  markerstrokecolor=nothing, markerstrokewidth=0)
gui()
ArgumentError: Package PyPlot not found in current path.
- Run `import Pkg; Pkg.add("PyPlot")` to install the PyPlot package.

Stacktrace:
  [1] macro expansion
    @ ./loading.jl:2296 [inlined]
  [2] macro expansion
    @ ./lock.jl:273 [inlined]
  [3] __require(into::Module, mod::Symbol)
    @ Base ./loading.jl:2271
  [4] #invoke_in_world#3
    @ ./essentials.jl:1089 [inlined]
  [5] invoke_in_world
    @ ./essentials.jl:1086 [inlined]
  [6] require(into::Module, mod::Symbol)
    @ Base ./loading.jl:2260
  [7] top-level scope
    @ ~/.julia/packages/Plots/FFuQi/src/backends.jl:883
  [8] eval
    @ ./boot.jl:430 [inlined]
  [9] _initialize_backend(pkg::Plots.PyPlotBackend)
    @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:882
 [10] backend(pkg::Plots.PyPlotBackend)
    @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:245
 [11] pyplot()
    @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:86
 [12] top-level scope
    @ In[14]:3