## Lecture 5, sparse matrix operations.

## Load the Candyland matrix.
using DelimitedFiles
using SparseArrays
data = readdlm("candyland-matrix.csv",',')
TI = Int.(data[:,1])
TJ = Int.(data[:,2])
TV = data[:,3]
T = sparse(TI,TJ, TV, 140,140)
coords = readdlm("candyland-coords.csv",',')
x = coords[:,1]
y = coords[:,2]
cells = Int.(readdlm("candyland-cells.csv",','))
start,dest = cells[1:2]

2-element Array{Int64,1}:
 140
 134

## Let's see the matrix we had in class
C = sparse([0 1 0 0 1; 2 0 0 6 0; 0 0 0 0 3])
@show C.colptr
@show C.rowval
@show C.nzval

C.colptr = [1, 2, 3, 3, 4, 6]
C.rowval = [2, 1, 2, 1, 3]
C.nzval = [2, 1, 6, 1, 3]

5-element Array{Int64,1}:
 2
 1
 6
 1
 3

##
sum(T,dims=1)

1×140 Array{Float64,2}:
 1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  …  0.0  1.0  1.0  1.0  1.0  1.0  1.0

##
findall(vec(sum(T,dims=1)) .< 0.9)

1-element Array{Int64,1}:
 134

## Let's see how Julia stores thigns
typeof(T)

SparseMatrixCSC{Float64,Int64}

## What can we do with T?
fieldnames(typeof(T))
(:m, :n, :colptr, :rowval, :nzval)

(:m, :n, :colptr, :rowval, :nzval)

##
T.colptr

141-element Array{Int64,1}:
    1
   19
   37
   55
   73
   74
   92
  110
  128
  146
  164
  182
  200
    ⋮
 2210
 2220
 2229
 2237
 2244
 2244
 2245
 2246
 2249
 2252
 2255
 2273

##
T.nzval

2272-element Array{Float64,1}:
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.0625  
 0.015625
 0.0625  
 0.0625  
 0.046875
 0.046875
 0.0625  
 ⋮       
 0.0625  
 0.0625  
 0.015625
 0.0625  
 0.0625  
 0.046875
 0.046875
 0.015625
 0.015625
 0.015625
 0.015625
 0.015625

##
T.rowval

2272-element Array{Int64,1}:
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
   ⋮
   7
   8
   9
  10
  11
  12
  13
  20
  42
  69
  92
 102

## Why sparse matrices?
# The first reason is memory!
print(varinfo(r"T"))
A = Array(T)
print(varinfo(r"A")) # so it takes about 20% of the memory!

| name |       size | summary                                |
|:---- | ----------:|:-------------------------------------- |
| T    | 36.758 KiB | 140×140 SparseMatrixCSC{Float64,Int64} |
| TI   | 17.789 KiB | 2272-element Array{Int64,1}            |
| TJ   | 17.789 KiB | 2272-element Array{Int64,1}            |
| TV   | 17.789 KiB | 2272-element Array{Float64,1}          |
| name |        size | summary                  |
|:---- | -----------:|:------------------------ |
| A    | 153.164 KiB | 140×140 Array{Float64,2} |

## The second reason is speed as you'll see on the homework.

## The downside is that some operations need a lot more care.

## Let's write our own matvec given the raw arrays
""" My goal: compute T*x where we just use the
index arrays for coordinate/index/triplet storage
of the non-zero data in T.

TI, TJ, TV: the triplet arrays for T.
x: the vector for T*x (should have at least n entries
for the n columns of T)
m: the number of rows
"""
function mymatvec(TI,TJ,TV,x,m)
  y = zeros(m) # allocate output
  for nzi in 1:length(TI) # for each entry in T
    i = TI[nzi]
    j = TJ[nzi]
    v = TV[nzi]
    y[i] += v*x[j]
  end
  # A fancy way
  #for (i,j,v) in zip(TI,TJ,TV)
  #  y[i] += v*x[j]
  # end
  return y
end
x = randn(140)
mymatvec(TI,TJ,TV,x,140) == T*x

true

##
y = mymatvec(TI,TJ,TV,x,140)
z = T*x

140-element Array{Float64,1}:
 -0.1107282554399252   
 -0.10620188429941169  
  0.01985407332408448  
  0.01330281424314557  
  0.0009486614075788352
  0.0009486614075788352
  0.07124160935790018  
  0.055570851372365684 
 -0.20482058414286247  
  0.21166222003102153  
  0.20185663644342244  
  0.21161559322027926  
  0.13532672785020064  
  ⋮                    
 -0.17064982091282066  
 -0.18519002645597255  
 -0.12407757753817551  
 -0.024353343338121197 
 -0.1795032959184152   
  0.6105097690809262   
  1.931097032209077    
 -0.5095574644727809   
  2.2327427635498607   
  1.3651797491549558   
 -2.9385496730018152   
  0.0

##
## Let's see how Julia stores thigns
typeof(T)

SparseMatrixCSC{Float64,Int64}

## What can we do with T?
fieldnames(typeof(T))

(:m, :n, :colptr, :rowval, :nzval)

##
T.colptr

141-element Array{Int64,1}:
    1
   19
   37
   55
   73
   74
   92
  110
  128
  146
  164
  182
  200
    ⋮
 2210
 2220
 2229
 2237
 2244
 2244
 2245
 2246
 2249
 2252
 2255
 2273

##
T.nzval

2272-element Array{Float64,1}:
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.09375 
 0.0625  
 0.015625
 0.0625  
 0.0625  
 0.046875
 0.046875
 0.0625  
 ⋮       
 0.0625  
 0.0625  
 0.015625
 0.0625  
 0.0625  
 0.046875
 0.046875
 0.015625
 0.015625
 0.015625
 0.015625
 0.015625

##
T.rowval

2272-element Array{Int64,1}:
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
   ⋮
   7
   8
   9
  10
  11
  12
  13
  20
  42
  69
  92
 102

##
""" This does a matvec y=T*x using CSC arrays instead.

colptr,rowval,nzval: CSC arrays for a matrix T
x: the vector with at least n columns
m: the number of rows so we can allocate the output.
"""
function cscmatvec(colptr,rowval,nzval,x,m)
  y = zeros(m) # allocate output
  for j=1:length(colptr)-1 # for each column ...
    for nzi=colptr[j]:colptr[j+1]-1 # for each entry in the column
      i = rowval[nzi]
      v = nzval[nzi]
      y[i] += v*x[j] # same as indexed form!
    end
  end
  return y
end
cscmatvec(T.colptr,T.rowval,T.nzval,x,140) == T*x

true

## Which one is faster? (If any) (Question from class)
using BenchmarkTools
@btime mymatvec(TI,TJ,TV,x,140)
@btime cscmatvec(T.colptr,T.rowval,T.nzval,x,140)

┌ Info: Precompiling BenchmarkTools [6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf]
└ @ Base loading.jl:1242

  3.767 μs (1 allocation: 1.22 KiB)
  4.236 μs (1 allocation: 1.22 KiB)

140-element Array{Float64,1}:
 -0.1107282554399252   
 -0.10620188429941169  
  0.01985407332408448  
  0.01330281424314557  
  0.0009486614075788352
  0.0009486614075788352
  0.07124160935790018  
  0.055570851372365684 
 -0.20482058414286247  
  0.21166222003102153  
  0.20185663644342244  
  0.21161559322027926  
  0.13532672785020064  
  ⋮                    
 -0.17064982091282066  
 -0.18519002645597255  
 -0.12407757753817551  
 -0.024353343338121197 
 -0.1795032959184152   
  0.6105097690809262   
  1.931097032209077    
 -0.5095574644727809   
  2.2327427635498607   
  1.3651797491549558   
 -2.9385496730018152   
  0.0

## Note that getting a random column in T is fast
# Note, run this a few times, the first time
# in Julia tends to be unreliable.
@btime T[:,(rand(1:size(T,2)))]

  209.225 ns (4 allocations: 471 bytes)

140-element SparseVector{Float64,Int64} with 18 stored entries:
  [9  ]  =  0.015625
  [20 ]  =  0.015625
  [21 ]  =  0.09375
  [22 ]  =  0.09375
  [23 ]  =  0.09375
  [24 ]  =  0.09375
  [25 ]  =  0.09375
  [26 ]  =  0.09375
  [27 ]  =  0.0625
  [28 ]  =  0.0625
  [29 ]  =  0.0625
  [30 ]  =  0.0625
  [31 ]  =  0.046875
  [32 ]  =  0.046875
  [42 ]  =  0.015625
  [69 ]  =  0.015625
  [92 ]  =  0.015625
  [102]  =  0.015625

## But getting a random row in T is slow
@btime T[rand(1:size(T,1)),:]

  1.129 μs (8 allocations: 544 bytes)

140-element SparseVector{Float64,Int64} with 12 stored entries:
  [32 ]  =  0.046875
  [33 ]  =  0.046875
  [34 ]  =  0.046875
  [36 ]  =  0.046875
  [37 ]  =  0.046875
  [38 ]  =  0.09375
  [39 ]  =  0.09375
  [40 ]  =  0.09375
  [41 ]  =  0.09375
  [42 ]  =  0.09375
  [43 ]  =  0.09375
  [44 ]  =  0.09375

## Okay, maybe we need a bigger matrix
S = sprandn(100000,100000,10/100000)

100000×100000 SparseMatrixCSC{Float64,Int64} with 1001495 stored entries:
  [2949 ,      1]  =  -0.760081
  [3033 ,      1]  =  0.966406
  [4568 ,      1]  =  1.11003
  [17111,      1]  =  0.541208
  [20674,      1]  =  0.44155
  [36547,      1]  =  0.11603
  [57505,      1]  =  -0.541589
  [61721,      1]  =  0.117019
  [74035,      1]  =  0.578567
  [80102,      1]  =  1.11259
  [82431,      1]  =  0.424175
  [98188,      1]  =  -0.310821
  ⋮
  [67442,  99999]  =  -0.793046
  [91816,  99999]  =  -0.367732
  [4788 , 100000]  =  -1.57009
  [15843, 100000]  =  1.45471
  [20351, 100000]  =  2.33565
  [28227, 100000]  =  1.39962
  [32816, 100000]  =  -0.133632
  [38365, 100000]  =  1.40451
  [45541, 100000]  =  -1.01711
  [54577, 100000]  =  -1.3021
  [56983, 100000]  =  -0.113331
  [85536, 100000]  =  -0.0157434
  [98600, 100000]  =  -1.70448

##Note that getting a random column in S is fast
@btime S[:,rand(1:size(T,2))]

  206.834 ns (4 allocations: 384 bytes)

100000-element SparseVector{Float64,Int64} with 11 stored entries:
  [16911 ]  =  -0.178406
  [19966 ]  =  0.165452
  [21552 ]  =  -1.21315
  [53871 ]  =  0.532565
  [54394 ]  =  -0.214918
  [57484 ]  =  0.173088
  [72968 ]  =  -0.682497
  [84889 ]  =  0.957582
  [87002 ]  =  0.174625
  [90783 ]  =  -1.55781
  [93604 ]  =  0.331715

## But getting a random row in S is slow
@btime S[rand(1:size(T,1)),:]

  493.338 μs (6 allocations: 320 bytes)

100000-element SparseVector{Float64,Int64} with 9 stored entries:
  [6897  ]  =  0.398031
  [7303  ]  =  0.393168
  [24604 ]  =  0.869228
  [31199 ]  =  -1.27642
  [32212 ]  =  1.71992
  [50677 ]  =  -1.79201
  [60406 ]  =  0.433725
  [71688 ]  =  0.0821909
  [84468 ]  =  -0.0231793

## Also note that there is NO way we could possibly store S with dense
length(S)*8/1e9 # it would take 80GB of memory, which is 10x my Mac.

80.0

## If you haven't compiled an operation before, it can take a while to run
@time S[:,UInt32(rand(1:size(S,2)))]

  0.096028 seconds (67.98 k allocations: 3.424 MiB, 64.52% gc time)

100000-element SparseVector{Float64,Int64} with 7 stored entries:
  [3619  ]  =  0.574508
  [13083 ]  =  -0.798962
  [47570 ]  =  -1.66306
  [52973 ]  =  1.41775
  [70964 ]  =  -1.75527
  [71310 ]  =  -0.463733
  [78736 ]  =  0.157322

##
@time S[:,UInt32(rand(1:size(S,2)))]

  0.000021 seconds (11 allocations: 496 bytes)

100000-element SparseVector{Float64,Int64} with 4 stored entries:
  [35010 ]  =  0.422775
  [50323 ]  =  -2.35307
  [54196 ]  =  -1.35623
  [68620 ]  =  0.683581

## Back to uint64
@time S[:,UInt64(rand(1:size(S,2)))]

  0.028696 seconds (51.60 k allocations: 2.523 MiB)

100000-element SparseVector{Float64,Int64} with 10 stored entries:
  [2258  ]  =  -0.591302
  [17900 ]  =  1.57858
  [21620 ]  =  1.0053
  [29768 ]  =  -0.496797
  [34536 ]  =  0.109212
  [45110 ]  =  -0.800042
  [55041 ]  =  0.49368
  [58822 ]  =  -0.722277
  [64085 ]  =  0.402992
  [72928 ]  =  -0.365272

##
@btime S[:,Int32(rand(1:size(S,2)))]

  628.440 ns (6 allocations: 422 bytes)

100000-element SparseVector{Float64,Int64} with 7 stored entries:
  [23634 ]  =  1.43912
  [34469 ]  =  -0.0486339
  [54433 ]  =  0.565023
  [68192 ]  =  -0.446825
  [83018 ]  =  1.13066
  [89824 ]  =  0.627561
  [90464 ]  =  -0.112001

##
@btime S[:,Int32(rand(1:size(S,2)))]

  640.055 ns (6 allocations: 424 bytes)

100000-element SparseVector{Float64,Int64} with 10 stored entries:
  [24901 ]  =  0.393266
  [30138 ]  =  0.179461
  [34455 ]  =  -1.03963
  [41194 ]  =  0.86418
  [47700 ]  =  0.877746
  [83464 ]  =  0.313525
  [86276 ]  =  -1.06142
  [91528 ]  =  -2.0108
  [91707 ]  =  0.685677
  [99913 ]  =  0.602214

## And we can go bigger
S = sprandn(10000000,10000000,10/10000000)

10000000×10000000 SparseMatrixCSC{Float64,Int64} with 100003628 stored entries:
  [1125733,        1]  =  0.0214984
  [1294615,        1]  =  -0.378385
  [1325132,        1]  =  -0.867649
  [1960225,        1]  =  0.498448
  [2994202,        1]  =  1.25384
  [6557321,        1]  =  -0.941765
  [7151327,        1]  =  -0.143008
  [7733179,        1]  =  0.92175
  [8972759,        1]  =  -0.203183
  [9802689,        1]  =  0.768445
  [9904619,        1]  =  0.311664
  [562754 ,        2]  =  0.380946
  ⋮
  [9844636,  9999999]  =  -1.04418
  [9868954,  9999999]  =  1.24702
  [637717 , 10000000]  =  -1.81746
  [937539 , 10000000]  =  1.43993
  [2105543, 10000000]  =  -0.290297
  [2712754, 10000000]  =  0.59528
  [3292182, 10000000]  =  1.2218
  [3348415, 10000000]  =  -0.0352114
  [7103559, 10000000]  =  1.71144
  [7221506, 10000000]  =  0.0140375
  [7274677, 10000000]  =  0.00365515
  [7625826, 10000000]  =  -1.84424
  [9853541, 10000000]  =  -0.62727

##Note that getting a random column in S is fast
@time S[:,rand(1:size(T,2))]

  0.000010 seconds (8 allocations: 512 bytes)

10000000-element SparseVector{Float64,Int64} with 7 stored entries:
  [1227358 ]  =  0.346505
  [1736614 ]  =  -0.190221
  [4402053 ]  =  -0.801169
  [5051250 ]  =  0.601583
  [5825907 ]  =  0.634294
  [6013967 ]  =  -0.0739769
  [6615464 ]  =  1.21081

## But getting a random row in S is slow
@time S[rand(1:size(T,1)),:]

  0.100011 seconds (14 allocations: 928 bytes)

10000000-element SparseVector{Float64,Int64} with 12 stored entries:
  [804299  ]  =  1.14295
  [1464882 ]  =  -0.898365
  [2942646 ]  =  -0.104351
  [3686845 ]  =  0.124023
  [4221780 ]  =  1.2454
  [4821696 ]  =  0.76197
  [5934926 ]  =  0.117143
  [5958393 ]  =  -0.448999
  [6874521 ]  =  2.21434
  [7224220 ]  =  1.10289
  [9011548 ]  =  0.42994
  [9279906 ]  =  -0.197022