## Backwards stable algorithms for variance computations!
# The following algorithm is a tempting idea to implement
# to make it easier to compute the variance of numbers in a
# single pass
function badvar1(x::Vector{Float64})
    ex2 = 0.0
    ex = 0.0
    n = length(x)
    for i=1:n
      ex2 = ex2 + x[i]^2
      ex = ex + x[i]
    end
    @show ex2, ex^2
    return 1.0/(n-1)*(ex2 - (ex)^2/n)
end

badvar1 (generic function with 1 method)

using Random
Random.seed!(1)

MersenneTwister(UInt32[0x00000001], Random.DSFMT.DSFMT_state(Int32[1749029653, 1072851681, 1610647787, 1072862326, 1841712345, 1073426746, -198061126, 1073322060, -156153802, 1073567984  …  1977574422, 1073209915, 278919868, 1072835605, 1290372147, 18858467, 1815133874, -1716870370, 382, 0]), [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], UInt128[0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000  …  0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000], 1002, 0)

## It seems to work
using Statistics
x = randn(100)
basevar = x -> (length(x)/(length(x)-1))*mean((x .- mean(x)).^2)
@show badvar1(x)
@show basevar(x)

(ex2, ex ^ 2) = (101.05387543499344, 2.9071941253525297)
badvar1(x) = 1.0204525605428276
basevar(x) = 1.0204525605428274

1.0204525605428274

## Bigger numbers
x = randn(10000).+1e4
@show badvar1(x)
@show basevar(x)

(ex2, ex ^ 2) = (1.0000024378415356e12, 1.000002427689298e16)
badvar1(x) = 1.0153252996198057
basevar(x) = 1.0153260486975786

1.0153260486975786

## Getting bigger
x = randn(10000).+1e6
@show badvar1(x)
@show basevar(x)

(ex2, ex ^ 2) = (1.000000009796535e16, 1.0000000097955167e20)
badvar1(x) = 1.0185018501850185
basevar(x) = 1.0089159874917228

1.0089159874917228

##
x = randn(10000).+1e8
@show badvar1(x)
@show basevar(x)

(ex2, ex ^ 2) = (1.0000000001194625e20, 1.0000000001194626e24)
badvar1(x) = -1.6385638563856386
basevar(x) = 1.0011340684439614

1.0011340684439614

## Eek! That's the wrong value completely. This shows the routine is not
# backwards stable.
# But there is a better way to do this that keeps some extra data around.

## This is essentially what Julia does
function goodvar(x::Vector{Float64})
    n = length(x); mean = 0.0; m2 = 0.0; N = 0.0
    for i=1:n
        N = N + 1
        delta = x[i] - mean
        mean = mean + delta/N
        m2 = m2 + delta*(x[i]-mean)
    end
    return m2/(n-1)
end

x = randn(10000).+1e10
basevar = var
@show badvar1(x)
@show basevar(x)
@show goodvar(x)
@show var(x)

(ex2, ex ^ 2) = (9.999999999953065e23, 9.999999999953037e27)
badvar1(x) = 281885.41734173417
basevar(x) = 0.9864987970412976
goodvar(x) = 0.9864985157601345
var(x) = 0.9864987970412976

0.9864987970412976