using SparseArrays
using LinearAlgebra
function simple_pagerank(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
x = Vector{T}(undef, n)
fill!(x, one(T)/n) # initialize to 1/n
niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
for iter=1:niter
dx = sum(x[d .== 0])
x ./= d
y = A'*x # compute a matrix vector product
y .*= alpha
y .+= (1.0-alpha)/n
y .+= alpha*dx/n
x = y
end
return x
end
simple_pagerank (generic function with 1 method)
##
function simple_pagerank_2(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
ei,ej,ev = findnz(A)
P = sparse(ei,ej,ev./d[ei],size(A,1),size(A,2))
niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
x = Vector{T}(undef, n)
fill!(x, one(T)/n) # initialize to 1/n
for iter=1:niter
y = P'*x
y .*= alpha
missing = norm(x,1) - norm(y,1)
y .+= missing/n
x = y
end
return x
end
#
function simple_pagerank_3(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
x = zeros(n)
niter = ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
for iter=1:niter
dx = sum(x[d .== 0])
x ./= d
y = A'*x # compute a matrix vector product
y .*= alpha
y .+= (1.0-alpha)/n
y .+= alpha*dx/n
x = y
end
return x
end
simple_pagerank_3 (generic function with 1 method)
##
using NearestNeighbors
function gnk_with_dangling_and_preferential_attachment(n,k,d,p)
xy = rand(2,n)
T = BallTree(xy)
idxs = knn(T, xy, k)[1]
# form the edges for sparse
ei = Int[]
ej = Int[]
for i=1:n
if rand() < d # skip this node because it's a dangling
# continue
# run k steps of PA instead
for s = 1:k
v = rand(1:i)
if length(ej) > 0
j = rand(ej) # find a random node based on it's in-degree
else
j = rand(1:n)
end
push!(ei, v)
push!(ej, j)
end
continue
end
for j=idxs[i]
if i != j
push!(ei,i)
push!(ej,j)
end
end
end
# run p steps of preferential attachment
for s=1:p
v = rand(1:n)
j = rand(ej) # find a random node based on it's in-degree
push!(ei, v)
push!(ej, j)
end
return xy, sparse(ei,ej,1.0,n,n)
end
using Random
Random.seed!(0) # make it consistent
@time xy, A = gnk_with_dangling_and_preferential_attachment(1_000_000,3,0.5, 300000)
@show extrema(sum(A,dims=1))
@show extrema(sum(A,dims=2))
6.767769 seconds (4.85 M allocations: 546.759 MiB, 5.72% gc time, 6.86% compilation time) extrema(sum(A, dims = 1)) = (0.0, 9581.0) extrema(sum(A, dims = 2)) = (0.0, 27.0)
(0.0, 27.0)
##
@time x = simple_pagerank(A, 0.85)
@show extrema(x)
13.944109 seconds (1.38 M allocations: 3.941 GiB, 5.65% gc time, 2.49% compilation time) extrema(x) = (2.617334190260745e-7, 0.0022751054764767516)
(2.617334190260745e-7, 0.0022751054764767516)
##
@time x2 = simple_pagerank_2(A,0.85)
@show extrema(x2)
14.064803 seconds (474.90 k allocations: 3.564 GiB, 5.00% gc time, 2.46% compilation time) extrema(x2) = (2.6173341902656334e-7, 0.0022751054764809596)
(2.6173341902656334e-7, 0.0022751054764809596)
##
## This one skips extra iterations
@time x3 = simple_pagerank_3(A,0.85)
@show extrema(x3)
7.459238 seconds (206.37 k allocations: 1.958 GiB, 5.54% gc time, 1.77% compilation time) extrema(x3) = (2.6173341902607444e-7, 0.0022751054764767503)
(2.6173341902607444e-7, 0.0022751054764767503)
##
norm(x-x2,1)
1.867535545206317e-12
##
##
norm(x-x2,1)
1.867535545206317e-12
##
@show norm(x,1)
@show sum(x)
norm(x, 1) = 0.9999999999999538 sum(x) = 1.0000000000000002
1.0000000000000002
##
using KahanSummation
@show sum_kbn(x)
ArgumentError: Package KahanSummation not found in current path. - Run `import Pkg; Pkg.add("KahanSummation")` to install the KahanSummation package. Stacktrace: [1] macro expansion @ ./loading.jl:2296 [inlined] [2] macro expansion @ ./lock.jl:273 [inlined] [3] __require(into::Module, mod::Symbol) @ Base ./loading.jl:2271 [4] #invoke_in_world#3 @ ./essentials.jl:1089 [inlined] [5] invoke_in_world @ ./essentials.jl:1086 [inlined] [6] require(into::Module, mod::Symbol) @ Base ./loading.jl:2260
##
p = randperm(size(A,1))
@time Ap = A[p,p]
@time xp = simple_pagerank(Ap, 0.85)
x2 = similar(x)
x2[p] = xp
0.970445 seconds (494.71 k allocations: 129.595 MiB, 1.50% gc time, 40.41% compilation time) 14.184382 seconds (4.45 k allocations: 3.873 GiB, 6.19% gc time)
1000000-element Vector{Float64}: 1.8689317121525526e-6 4.786449900439338e-7 7.176027319087448e-7 2.6173341902607455e-7 1.3470631403702117e-6 5.187482925172743e-7 5.168779548279929e-7 4.174648033465889e-7 4.1000905666170306e-7 2.6173341902607455e-7 2.6173341902607455e-7 3.460948404400897e-7 3.729701221121562e-7 ⋮ 1.1694232158464362e-6 3.729701221121562e-7 2.6257239994553696e-6 4.1777379418849465e-7 3.062281002605072e-7 2.6173341902607455e-7 2.6173341902607455e-7 2.6173341902607455e-7 2.6173341902607455e-7 2.6173341902607455e-7 7.385874663913488e-7 2.6173341902607455e-7
##
mapreduce(z -> abs(z[1]-z[2]), +, zip(x,x2))
5.698913902092186e-16
##
using Plots
pyplot()
scatter(xy[1,:], xy[2,:], zcolor=x, markersize=0.5, alpha=0.5,
markerstrokecolor=nothing, markerstrokewidth=0)
gui()
ArgumentError: Package PyPlot not found in current path. - Run `import Pkg; Pkg.add("PyPlot")` to install the PyPlot package. Stacktrace: [1] macro expansion @ ./loading.jl:2296 [inlined] [2] macro expansion @ ./lock.jl:273 [inlined] [3] __require(into::Module, mod::Symbol) @ Base ./loading.jl:2271 [4] #invoke_in_world#3 @ ./essentials.jl:1089 [inlined] [5] invoke_in_world @ ./essentials.jl:1086 [inlined] [6] require(into::Module, mod::Symbol) @ Base ./loading.jl:2260 [7] top-level scope @ ~/.julia/packages/Plots/FFuQi/src/backends.jl:883 [8] eval @ ./boot.jl:430 [inlined] [9] _initialize_backend(pkg::Plots.PyPlotBackend) @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:882 [10] backend(pkg::Plots.PyPlotBackend) @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:245 [11] pyplot() @ Plots ~/.julia/packages/Plots/FFuQi/src/backends.jl:86 [12] top-level scope @ In[14]:3