using SparseArrays
using LinearAlgebra
function simple_pagerank(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
x = Vector{T}(undef, n)
fill!(x, one(T)/n) # initialize to 1/n
niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
for iter=1:niter
dx = sum(x[d .== 0])
x ./= d
y = A'*x # compute a matrix vector product
y .*= alpha
y .+= (1.0-alpha)/n
y .+= alpha*dx/n
x = y
end
return x
end
##
function simple_pagerank_2(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
ei,ej,ev = findnz(A)
P = sparse(ei,ej,ev./d[ei],size(A,1),size(A,2))
niter = 2*ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
x = Vector{T}(undef, n)
fill!(x, one(T)/n) # initialize to 1/n
for iter=1:niter
y = P'*x
y .*= alpha
missing = norm(x,1) - norm(y,1)
y .+= missing/n
x = y
end
return x
end
#
function simple_pagerank_3(A::SparseMatrixCSC, alpha::T) where T
n = size(A,1)
d = vec(sum(A,dims=2)) # compute the out-degrees
x = zeros(n)
niter = ceil(Int,log(eps(T))/log(alpha)) # we can get this as an upper-bound
for iter=1:niter
dx = sum(x[d .== 0])
x ./= d
y = A'*x # compute a matrix vector product
y .*= alpha
y .+= (1.0-alpha)/n
y .+= alpha*dx/n
x = y
end
return x
end
##
using NearestNeighbors
function gnk_with_dangling_and_preferential_attachment(n,k,d,p)
xy = rand(2,n)
T = BallTree(xy)
idxs = knn(T, xy, k)[1]
# form the edges for sparse
ei = Int[]
ej = Int[]
for i=1:n
if rand() < d # skip this node because it's a dangling
# continue
# run k steps of PA instead
for s = 1:k
v = rand(1:i)
if length(ej) > 0
j = rand(ej) # find a random node based on it's in-degree
else
j = rand(1:n)
end
push!(ei, v)
push!(ej, j)
end
continue
end
for j=idxs[i]
if i != j
push!(ei,i)
push!(ej,j)
end
end
end
# run p steps of preferential attachment
for s=1:p
v = rand(1:n)
j = rand(ej) # find a random node based on it's in-degree
push!(ei, v)
push!(ej, j)
end
return xy, sparse(ei,ej,1.0,n,n)
end
using Random
Random.seed!(0) # make it consistent
@time xy, A = gnk_with_dangling_and_preferential_attachment(1_000_000,3,0.5, 300000)
@show extrema(sum(A,dims=1))
@show extrema(sum(A,dims=2))
##
@time x = simple_pagerank(A, 0.85)
@show extrema(x)
##
@time x2 = simple_pagerank_2(A,0.85)
@show extrema(x2)
##
## This one skips extra iterations
@time x3 = simple_pagerank_3(A,0.85)
@show extrema(x3)
##
norm(x-x2,1)
##
##
norm(x-x2,1)
##
@show norm(x,1)
@show sum(x)
##
using KahanSummation
@show sum_kbn(x)
##
p = randperm(size(A,1))
@time Ap = A[p,p]
@time xp = simple_pagerank(Ap, 0.85)
x2 = similar(x)
x2[p] = xp
##
mapreduce(z -> abs(z[1]-z[2]), +, zip(x,x2))
##
using Plots
pyplot()
scatter(xy[1,:], xy[2,:], zcolor=x, markersize=0.5, alpha=0.5,
markerstrokecolor=nothing, markerstrokewidth=0)
gui()