In [1]:
## Backwards stable algorithms for variance computations!
# The following algorithm is a tempting idea to implement
# to make it easier to compute the variance of numbers in a
# single pass
function badvar1(x::Vector{Float64})
    ex2 = 0.0
    ex = 0.0
    n = length(x)
    for i=1:n
      ex2 = ex2 + x[i]^2
      ex = ex + x[i]
    end
    @show ex2, ex^2
    return 1.0/(n-1)*(ex2 - (ex)^2/n)
end
Out[1]:
badvar1 (generic function with 1 method)
In [2]:
## It seems to work
using Statistics
x = randn(100)
basevar = x -> (length(x)/(length(x)-1))*mean((x .- mean(x)).^2)
@show badvar1(x)
@show basevar(x)
(ex2, ex ^ 2) = (101.86620350266104, 70.39082041312703)
badvar1(x) = 1.021841366651816
basevar(x) = 1.021841366651816
Out[2]:
1.021841366651816
In [3]:
##
x = randn(10000).+1e4
@show badvar1(x)
@show basevar(x)
(ex2, ex ^ 2) = (1.0000017605788628e12, 1.0000017504979118e16)
badvar1(x) = 1.0081959245729262
basevar(x) = 1.0081965348132307
Out[3]:
1.0081965348132307
In [4]:
##
x = randn(10000).+1e6
@show badvar1(x)
@show basevar(x)
(ex2, ex ^ 2) = (9.999999910623058e15, 9.99999991061303e19)
badvar1(x) = 1.002900290029003
basevar(x) = 1.0036868990837107
Out[4]:
1.0036868990837107
In [5]:
##
x = randn(10000).+1e8
@show badvar1(x)
@show basevar(x)
(ex2, ex ^ 2) = (1.000000000047669e20, 1.000000000047664e24)
badvar1(x) = 49.15691569156916
basevar(x) = 0.9969226611273928
Out[5]:
0.9969226611273928
In [6]:
## Eek! That's the wrong value completely. This shows the routine is not
# backwards stable.
# But there is a better way to do this that keeps some extra data around.
In [7]:
## This is essentially what Julia does
function goodvar(x::Vector{Float64})
    n = length(x); mean = 0.0; m2 = 0.0; N = 0.0
    for i=1:n
        N = N + 1
        delta = x[i] - mean
        mean = mean + delta/N
        m2 = m2 + delta*(x[i]-mean)
    end
    return m2/(n-1)
end

x = randn(10000).+1e10
basevar = var
@show badvar1(x)
@show basevar(x)
@show goodvar(x)
@show var(x)
(ex2, ex ^ 2) = (1.0000000000045771e24, 1.0000000000045823e28)
badvar1(x) = -523501.4893489349
basevar(x) = 1.0036755560888162
goodvar(x) = 1.0036760185399454
var(x) = 1.0036755560888162
Out[7]:
1.0036755560888162
In [8]:
##
function norm2(x::Vector)
  sum2 = 0.0
  for i=1:size(x,1)
    sum2 += x[i].^2
  end
  return sqrt(sum2)
end
Out[8]:
norm2 (generic function with 1 method)
In [9]:
##
x = rand(10000).+1e300
@show norm2(x)
@show norm(x)
norm2(x) = Inf
UndefVarError: norm not defined

Stacktrace:
 [1] top-level scope at show.jl:576
 [2] top-level scope at In[9]:4