In [1]:
## Randomized matrix algorithms.
# Our overall point: A matrix of random entries is not random.
# More technically correct is:
# A matrix of random entries has "non-random" properties.
In [2]:
## Point 0. A sum of indep. random values isn't all that random.
x = rand(1000)
sum(x)
Out[2]:
496.1827742454111
In [3]:
## Point 1. A matrix of random indep. entries isn't all that random.
using LinearAlgebra
A = rand(50,50)
A = A + A'
maximum(eigvals(A))
Out[3]:
50.74706490455395
In [4]:
## Q: What is the eigenvector?
A = randn(1000,1000)
A = A + A'
lams,V = eigen!(A)
i = argmax(lams)
vi = V[:,i]
Out[4]:
1000-element Vector{Float64}:
 -0.004373569456320003
 -0.03256443550978276
 -0.03821620322357936
 -0.04457234563747056
 -0.02245151780801024
 -0.041903198848931886
 -0.02564913858244617
 -0.02772387111546329
 -0.005562440232284414
  0.043688281128365154
  0.04062569613301708
 -0.016885359897076433
 -0.03666974715437445
  ⋮
 -0.04976227861319174
  0.028770486821952962
 -0.005988579946295092
  0.007247399198496041
 -0.041628582193839136
 -0.01561372587983817
  0.0068291106836835536
  0.027457889128294832
 -0.0034395455132158748
  0.009538602684652925
 -0.01951460526873229
  0.04884506962938088
In [5]:
## A: The eigenvector appears to be converging to the normalized
# version of all ones!
In [6]:
##m
# ## This is an instance of a property called concentration.
# Concentration is just the property that a collection of random
# variable samples should be behave like the mean.

using Plots
pyplot()
n = 10_000
X = randn(n,5)
plot(cumsum(X ./ (1:n), dims=2), xscale=:log10)
gui()
ArgumentError: Package PyPlot not found in current path.
- Run `import Pkg; Pkg.add("PyPlot")` to install the PyPlot package.

Stacktrace:
  [1] macro expansion
    @ ./loading.jl:1772 [inlined]
  [2] macro expansion
    @ ./lock.jl:267 [inlined]
  [3] __require(into::Module, mod::Symbol)
    @ Base ./loading.jl:1753
  [4] #invoke_in_world#3
    @ ./essentials.jl:926 [inlined]
  [5] invoke_in_world
    @ ./essentials.jl:923 [inlined]
  [6] require(into::Module, mod::Symbol)
    @ Base ./loading.jl:1746
  [7] top-level scope
    @ ~/.julia/packages/Plots/du2dt/src/backends.jl:883
  [8] eval
    @ ./boot.jl:385 [inlined]
  [9] _initialize_backend(pkg::Plots.PyPlotBackend)
    @ Plots ~/.julia/packages/Plots/du2dt/src/backends.jl:882
 [10] backend
    @ ~/.julia/packages/Plots/du2dt/src/backends.jl:245 [inlined]
 [11] pyplot()
    @ Plots ~/.julia/packages/Plots/du2dt/src/backends.jl:86
 [12] top-level scope
    @ In[6]:7
In [7]:
## The same holds true for a collection of matrices.
using StatsBase
function eigtrial(n,nt)
  lams = Vector{Float64}()
  for t=1:nt
    A = rand(50,50)
    A = A + A'
    push!(lams, eigvals!(A)...)
  end
  h = fit(Histogram, lams; nbins = 50)
  h = normalize(h)
  return h
end
h = eigtrial(50,100)
plot(h.edges, h.weights)
Out[7]:
In [8]:
##
h = eigtrial(50,100)
plot!(h.edges, h.weights)
Out[8]:
In [9]:
##
h = eigtrial(50,100)
plot!(h.edges, h.weights)
Out[9]:
In [10]:
##
h = eigtrial(50,100)
plot!(h.edges, h.weights)
Out[10]:
In [11]:
##
# See, this is not random at all! It's basically
# the same every time!
In [12]:
## The idea of randomized-matrix algorithms is to
# use this non-randomness of random-matrices to
# make things faster!
In [13]:
## Let's look at the eigenfaces matrix.
using DelimitedFiles
A = readdlm(download("http://www.cs.purdue.edu/homes/dgleich/cs515-2019/homeworks/Yale_64.csv"),',')
RequestError: HTTP/1.1 401 Unauthorized while requesting http://www.cs.purdue.edu/homes/dgleich/cs515-2019/homeworks/Yale_64.csv

Stacktrace:
 [1] #3
   @ /Applications/Julia-1.10.app/Contents/Resources/julia/share/julia/stdlib/v1.10/Downloads/src/Downloads.jl:270 [inlined]
 [2] arg_write(f::Downloads.var"#3#4"{Nothing, Vector{Pair{String, String}}, Float64, Nothing, Bool, Nothing, Nothing, String}, arg::Nothing)
   @ ArgTools /Applications/Julia-1.10.app/Contents/Resources/julia/share/julia/stdlib/v1.10/ArgTools/src/ArgTools.jl:123
 [3] #download#2
   @ /Applications/Julia-1.10.app/Contents/Resources/julia/share/julia/stdlib/v1.10/Downloads/src/Downloads.jl:257 [inlined]
 [4] download(url::String, output::Nothing)
   @ Downloads /Applications/Julia-1.10.app/Contents/Resources/julia/share/julia/stdlib/v1.10/Downloads/src/Downloads.jl:246
 [5] #invokelatest#2
   @ ./essentials.jl:892 [inlined]
 [6] invokelatest
   @ ./essentials.jl:889 [inlined]
 [7] do_download(url::String, path::Nothing)
   @ Base ./download.jl:24
 [8] download(url::String)
   @ Base ./download.jl:20
 [9] top-level scope
   @ In[13]:3
In [14]:
##
M = A'
U,Sig,V = svd(M)
Out[14]:
SVD{Float64, Float64, Adjoint{Float64, Matrix{Float64}}, Vector{Float64}}
U factor:
1000×1000 adjoint(::Matrix{Float64}) with eltype Float64:
 -0.0116413     0.0199411    -0.00770235   …  -0.0168765    -0.0054402
 -0.05666      -0.0238119     0.0202237        0.00636878   -0.0203627
  0.0358899    -0.0386073     0.00783886      -0.00221557   -0.00117981
 -0.0559957     0.08808      -0.0286195        0.0165841    -0.0114938
  0.0114211     0.0240153    -0.0298194        0.0352125     0.0238268
  0.00202569    0.0403766     0.00765406   …   0.0238192     0.0147494
  0.0571765     0.093728      0.046312         0.0748758    -0.0256522
  0.0599306     0.002365      0.0197978       -0.000614802   0.0279423
 -0.0102735    -0.0127536    -0.00840206       0.0130782     0.00896001
 -0.000739685  -0.0681433     0.0102463        0.0238312    -0.00261282
  0.0448264     0.00222812   -0.016497     …   0.0354929    -0.00758857
  0.0168954     0.0427302     0.0360464       -0.00568042    0.00969447
 -0.0420623    -0.0575583    -0.00502543      -0.0115111    -0.0330026
  ⋮                                        ⋱                
  0.00806735   -0.0505558    -0.0366187        0.0401735    -0.0132255
  0.00157772    0.0223887    -0.000987147     -0.0514473    -0.0237043
  0.0741391    -0.0354142    -0.0511075    …  -0.0312391     0.0261625
  0.0178824    -0.0208616     0.0400842       -0.0368578     0.0532288
 -0.0519923     0.00252748    0.0287996       -0.0298738    -0.0744954
 -0.00273071    0.0226418     0.0142504       -0.0691591     0.0139842
 -0.00178246    0.0292325     0.0157836        0.0362573     0.025903
 -0.0377607     0.000230509  -0.0207213    …   0.0058383     0.0291866
 -0.0549296    -0.0484605    -0.000962952      0.00910037   -0.0220525
 -0.0196841    -0.0388082     0.0217553        0.0122588     0.0802305
 -0.0216516     0.00793657   -0.0284214        0.00486955   -0.0683406
  0.00960817   -0.0115979    -0.0393065        0.000625589   0.0334126
singular values:
1000-element Vector{Float64}:
 89.48048645725888
 88.69381698905761
 88.26736499178014
 87.82171721176475
 87.14027900054687
 87.10956206634715
 86.95620238592103
 86.56104255734273
 86.52150218305472
 86.32623137597807
 85.87119964022706
 85.50878491266148
 85.25002865742978
  ⋮
  0.6819523634978714
  0.636598249373538
  0.5714841196000741
  0.5633493995930333
  0.5243453064778097
  0.3744233490367495
  0.34411426840563736
  0.29700181630513156
  0.20910002816160833
  0.13540982059824822
  0.07820266734479002
  0.029546309420466016
Vt factor:
1000×1000 adjoint(::Matrix{Float64}) with eltype Float64:
  0.000981415   0.00256021    0.000221603  …   0.0201302     0.0139407
 -0.000330229   0.000123136  -0.00321152      -0.0223754    -0.0362377
  0.000442796  -0.00128241    0.00193787      -0.0790473    -0.00682032
 -0.000467374  -0.000243229  -0.00175014      -0.108061      0.00220043
 -0.00147453   -4.95414e-5   -0.00305217      -0.0316519     0.0103481
  0.00138592    0.000922428  -0.00119636   …   0.0495758     0.00702161
 -0.000257464  -0.000888468  -0.00154306       0.00340424   -0.00358625
  0.00102477    0.00101605    0.00129742       0.0112088    -0.0446883
 -0.0010012    -0.00284523    0.000269177      0.101652     -0.00415687
  0.0010464    -0.000602092  -0.000388655      0.0188107     0.0113128
 -0.000156764  -0.00260178   -0.00219968   …   0.086568     -0.0426994
  0.000723563   0.000405717   9.22554e-5      -0.0216484    -0.00703741
 -0.000285006  -0.000917482   0.00122904       0.00878219   -0.0145682
  ⋮                                        ⋱                
 -0.194368      0.095998     -0.00744271       0.000644832  -0.0163453
  0.244387      0.269107      0.0961445        0.000975316   0.0117105
  0.0236292    -0.153807     -0.245128     …   0.00172271   -0.0401108
 -0.0236124    -0.268696     -0.229367        -0.00088243    0.0456453
  0.260207     -0.00916007    0.303917         0.00231411   -0.00839179
 -0.31686       0.0204904     0.0782287        5.32261e-5   -0.0313769
  0.0116136     0.268386     -0.240237         0.00063968   -0.0420505
 -0.233811     -0.138834     -0.0192923    …  -0.000185451   0.0167584
  0.23307      -0.0782834    -0.587203        -0.00143221    0.0298694
 -0.0469756    -0.283486      0.217473        -0.00189813    0.0614413
  0.48021      -0.359194      0.0409543        0.000670258  -0.00560556
 -0.481431     -0.300821     -0.0721493        0.00103847    0.000220474
In [15]:
## A given matrix-times-random matrix is often called
# a sketch because it preserves certain properties due
# to what happens with random matrices being well-behaved
# and deterministic. It's a "sketch" betcause it's smaller.
# The algorithm here is from Halko Martinsson and Tropp.
# TODO, write the reference!
function simple_sketch(M::AbstractMatrix, k::Int, p::Int=10)
  m,n = size(M)
  S = M*randn(n,k+p)

  return Matrix(qr!(S).Q)
end
Q = simple_sketch(M, 5)
Out[15]:
1000×15 Matrix{Float64}:
 -0.0216086    -0.00552965   0.0333413    …  -0.00252976  -0.0275039
  0.0428449    -0.00621423  -0.0533399        0.0317142   -0.0207002
  0.0215153    -0.0137481   -0.00976263       0.014593    -0.0175601
 -0.0508218    -0.0174862   -0.0135799       -0.0612058    0.0335062
  0.0120567     0.00542803   0.0390961       -0.00441383   0.0600971
 -0.0250522     0.0422219    0.0349961    …   0.0133046   -0.031661
 -0.0162307    -0.0151139   -0.042496         0.0315419   -0.0186096
  0.0148247    -0.0500011   -0.01574          0.0154339   -0.0259733
 -0.000753879   0.0517396    0.041558        -0.00517635  -0.0390296
  0.00885776   -0.0595985    0.00631928      -0.0342964   -0.0387881
  0.0015211     0.0346923    0.0154815    …   0.0126152    0.0109995
  0.0464356    -0.0329446   -0.0171067       -0.0375565    0.00842268
 -0.00154024   -0.0136106    0.00614345      -0.0229735    0.0160771
  ⋮                                       ⋱               
 -0.0432694    -0.0504803    0.0167702        0.0374495   -0.0494475
  0.000548453  -0.00708725   0.0630704        0.00701765  -0.0678506
  0.0080606    -0.0217629   -0.0209519    …  -0.0430042    0.034677
  0.0068936     0.026786    -0.0196088       -0.0291209   -0.0514355
  0.00634373    0.0292533   -0.00798081       0.0317681   -0.026857
  0.0290163     0.0419157    0.0769951       -0.00321006   0.0240637
  0.0124189    -0.0677682    0.000440635     -0.0100413   -0.025357
  0.0366256    -0.0349447   -0.0140036    …  -0.0288014    0.0495413
  0.0119545     0.00513008   0.034554        -0.0394341   -0.053159
  0.00934685    0.0547894    0.0252767       -0.0303002   -0.0208406
 -0.0340502     0.00408336   0.0656694       -0.0366945    0.0544734
  0.00419345   -0.0174125   -0.017709        -0.0294871   -0.0586214
In [16]:
##
B = Q'*M
UB,SigB,VB = svd(B)
Uhat = Q*UB
Out[16]:
1000×15 Matrix{Float64}:
 -0.0145375   -0.0360073    -0.0220355   …  -0.0133045   -0.0338198
 -0.00684976   0.0233478    -0.00821625     -0.0115193    0.0434321
  0.00123571  -0.0830504    -0.00325817      0.0437861    0.000263058
 -0.0012105   -0.0211336     0.0187949      -0.0354911    0.00188498
  0.011415     0.0240692     0.0353083      -0.0233895   -0.0143323
  0.0106884   -0.000623833  -0.0256037   …   0.0366185   -0.042428
 -0.0044687    0.000522721  -0.033579        0.0300932    0.0106733
 -0.0114908   -0.0456632     0.0579112       0.0370625    0.00783643
 -0.0129946    0.01032       0.00151343     -0.00691709  -0.0270358
 -0.0524298   -0.0499136    -0.00350898      0.0166553   -0.00824639
  0.0277538    0.0571769     0.035507    …  -0.0176736   -0.0308826
 -0.0611717   -0.0227671    -0.00236828     -0.0465477    0.00399052
 -0.00629004   0.00470029   -0.0196702      -0.0136139    0.00637404
  ⋮                                      ⋱               
 -0.0509002    0.00367184   -0.0849476      -0.00635624  -0.0134595
 -0.0120278   -0.0323717    -0.0256997       0.0340547   -0.0186111
  0.0127692    0.0314742     0.0291774   …  -0.0149423    0.0159672
  0.00490476  -0.0247188     0.00575584      0.0610127   -0.0242218
 -0.0118881    0.0337922    -0.00992326      0.0378516   -0.00513123
  0.0338146    0.0190375    -0.0176712       0.00986794  -0.0435725
 -0.0195976   -0.0538982    -0.011111       -0.0340869    0.0547529
  0.0160174    0.00819075    0.0307067   …   0.0109287    0.0393771
 -0.0360006   -0.00466564    0.0452236       0.0186555   -0.0209252
  0.0502219    0.0251666     0.0358092       0.0580467   -0.0282163
  0.067044     0.0019366    -0.023288       -0.0556436   -0.0122259
 -0.063893    -0.00306752   -0.00290406      0.055031    -0.0155167
In [17]:
## Compare the two
[Sig[1:5] SigB[1:5]]
Out[17]:
5×2 Matrix{Float64}:
 89.4805  68.9069
 88.6938  67.9718
 88.2674  66.4939
 87.8217  66.1009
 87.1403  65.0198
In [18]:
## Look at the Vector
norm(Uhat[:,1] - U[:,1])
Out[18]:
1.3766411116017983
In [19]:
##
plot(
  heatmap(reshape(abs.(Uhat[:,1]), 64, 64), title="Random",color=:blues),
  heatmap(reshape(abs.(U[:,1]), 64, 64), title="Exact",color=:blues),
  yflip=true, )
gui()
DimensionMismatch: new dimensions (64, 64) must be consistent with array size 1000

Stacktrace:
 [1] (::Base.var"#throw_dmrsa#328")(dims::Tuple{Int64, Int64}, len::Int64)
   @ Base ./reshapedarray.jl:41
 [2] reshape
   @ ./reshapedarray.jl:45 [inlined]
 [3] reshape(::Vector{Float64}, ::Int64, ::Int64)
   @ Base ./reshapedarray.jl:117
 [4] top-level scope
   @ In[19]:2
In [20]:
##
# So the field is often dominated by analysis showing
# that these results are not a fluke! In fact, we can
# get quite precise bounds on how much work is needed
# in a variety of situations to get good accuracy.
In [21]:
##
plot(
  heatmap(reshape(abs.(Uhat[:,2]), 64, 64), title="Random",color=:blues),
  heatmap(reshape(abs.(U[:,2]), 64, 64), title="Exact",color=:blues),
  yflip=true, )
gui()
DimensionMismatch: new dimensions (64, 64) must be consistent with array size 1000

Stacktrace:
 [1] (::Base.var"#throw_dmrsa#328")(dims::Tuple{Int64, Int64}, len::Int64)
   @ Base ./reshapedarray.jl:41
 [2] reshape
   @ ./reshapedarray.jl:45 [inlined]
 [3] reshape(::Vector{Float64}, ::Int64, ::Int64)
   @ Base ./reshapedarray.jl:117
 [4] top-level scope
   @ In[21]:2