using PyPlot
using LinearAlgebra
using Random

Random.seed!(12345);

┌ Warning: PyPlot is using tkagg backend, which is known to cause crashes on MacOS (#410); use the MPLBACKEND environment variable to request a different backend.
└ @ PyPlot /Users/cdesa/.julia/packages/PyPlot/4wzW1/src/init.jl:192

Exploring Low-Precision Training¶

Let's look at how our logistic regression model from the previous couple of demos is affected by training in low-precision floating-point arithmetic.

First, define a low-precision numeric type in Julia.

import Base.convert
import Base.length
import Base.zero
import Base.one
import Base.typemin
import Base.transpose
import Base.sqrt
import LinearAlgebra.dot
import Base.exp
import Base.expm1
import Base.isless
import Base.log
import Base.+
import Base.-
import Base.*
import Base./


abstract type NoDynamicBias end;

# NB = number of mantissa bits
# NE = number of exponent bits
# EBIAS = extra exponent bias; default should be 0 which corresponds to a bias of (1 << (NE-1)) - 1
# RM = rounding mode
struct SimFP{NB,NE,EBIAS,RM} <: Number
  v::Float64;
  SimFP{NB,NE,EBIAS,RM}(x::Float64) where {NB,NE,EBIAS,RM} = new{NB,NE,EBIAS,RM}(quantize(x, SimFP{NB,NE,EBIAS,RM})); # NB, NE, EBIAS, RM));
end

function Float64(x::SimFP)
  return x.v
end

function convert(::Type{Float64}, x::SimFP)
  return x.v
end

function convert(::Type{SimFP{NB,NE,EBIAS,RM}}, x::SimFP) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(x.v)
end

function convert(::Type{SimFP{NB,NE,EBIAS,RM}}, x::Float64) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(x)
end

function convert(::Type{SimFP{NB,NE,EBIAS,RM}}, x) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(convert(Float64, x))
end

function (::Type{SimFP{NB,NE,EBIAS,RM}})(x) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(convert(Float64, x))
end

# function convert(::Type{T}, x::SimFP) where {T}
#   return convert(T, x.v)
# end

function zero(::Type{SimFP{NB,NE,EBIAS,RM}}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(0.0)
end

function zero(::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(0.0)
end

function one(::Type{SimFP{NB,NE,EBIAS,RM}}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(1.0)
end

function one(::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(1.0)
end

function typemin(::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(-realmax(Float64))
end

function typemax(::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(realmax(Float64))
end

function transpose(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return x
end

function sqrt(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(sqrt(x.v))
end

function dot(x::SimFP{NB,NE,EBIAS,RM}, y::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(dot(x.v, y.v))
end

function exp(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(exp(x.v))
end

function expm1(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(expm1(x.v))
end

function log(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(log(x.v))
end

function isless(x::SimFP{NB,NE,EBIAS,RM}, y::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return isless(x.v, y.v)
end

abstract type RoundingModeNearest end;
abstract type RoundingModeRandom end;

abstract type RoundingModeNearestFast end;
abstract type RoundingModeRandomFast end;

function quantize(x::Float64, nb::Int64, ne::Int64, ::Type{NoDynamicBias}, rm)
  return quantize(x, nb, ne, 0, rm);
end

function quantize(x::Float64, ::Type{SimFP{nb,ne,ebias,RoundingModeNearest}}) where {nb,ne,ebias}
  if (x == 0.0)
    return x;
  end
  # @assert(ne <= 11);
  xi = reinterpret(Int64, x);
  xir = (xi + 1 << (51 - nb)) & (~((1 << (52 - nb)) - 1));
  if (ne < 11)
    xir_pos = xir & (~(1 << 63));
    xir_sign = xir & (1 << 63);
    bias = ebias + (1 << (ne-1)) - 1;
    qf_max = (((bias + 1 + 1023) << nb) - 1) << (52 - nb);
    qf_min = (1023 - bias + 1) << 52;
    qf_min_d2 = (1023 - bias) << 52;
    if (xir_pos > qf_max)
      return reinterpret(Float64, qf_max | xir_sign);
    elseif (xir_pos < qf_min_d2)
      return Float64(0.0);
    elseif (xir_pos < qf_min)
      return reinterpret(Float64, qf_min | xir_sign);
    end
  end
  return reinterpret(Float64, xir);
end

function quantize(x::Float64, ::Type{SimFP{nb,ne,ebias,RoundingModeRandom}}) where {nb,ne,ebias}
  if (x == 0.0)
    return x;
  end
  # @assert(ne <= 11);
  mask = (1 << (52 - nb)) - 1;
  xi = reinterpret(Int64, x);
  xir = (xi + (rand(Int64) & mask)) & (~mask);
  if (ne < 11)
    xir_pos = xir & (~(1 << 63));
    xir_sign = xir & (1 << 63);
    bias = ebias + (1 << (ne-1)) - 1;
    qf_max = (((bias + 1 + 1023) << nb) - 1) << (52 - nb);
    qf_min = (1023 - bias + 1) << 52;
    if (xir_pos > qf_max)
      return reinterpret(Float64, qf_max | xir_sign);
    elseif (xir_pos < qf_min)
      xfr_pos = reinterpret(Float64, xir_pos);
      ff_min = reinterpret(Float64, qf_min);
      if (xfr_pos / ff_min > rand())
        return reinterpret(Float64, qf_min | xir_sign);
      else
        return Float64(0.0)
      end
    end
  end
  return reinterpret(Float64, xir);
end

function quantize(x::Float64, ::Type{SimFP{nb,ne,ebias,RoundingModeNearestFast}}) where {nb,ne,ebias}
  xi = reinterpret(Int64, x);
  xir = (xi + 1 << (51 - nb)) & (~((1 << (52 - nb)) - 1));
  return reinterpret(Float64, xir);
end

function quantize(x::Float64, ::Type{SimFP{nb,ne,ebias,RoundingModeRandomFast}}) where {nb,ne,ebias}
  mask = (1 << (52 - nb)) - 1;
  xi = reinterpret(Int64, x);
  xir = (xi + (rand(Int64) & mask)) & (~mask);
  return reinterpret(Float64, xir);
end

function +(x::SimFP{NB,NE,EBIAS,RM}, y::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(x.v + y.v)
end

function -(x::SimFP{NB,NE,EBIAS,RM}, y::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(x.v - y.v)
end

function *(x::SimFP{NB,NE,0,RM}, y::SimFP{NB,NE,EBIASY,RM}) where {NB,NE,EBIASY,RM}
  return SimFP{NB,NE,EBIASY,RM}(x.v * y.v)
end

function *(x::SimFP{NB,NE,EBIASX,RM}, y::SimFP{NB,NE,0,RM}) where {NB,NE,EBIASX,RM}
  return SimFP{NB,NE,EBIASX,RM}(x.v * y.v)
end

function *(x::SimFP{NB,NE,0,RM}, y::SimFP{NB,NE,0,RM}) where {NB,NE,RM}
  return SimFP{NB,NE,0,RM}(x.v * y.v)
end

function *(x::SimFP{NB,NE,EBIASX,RM}, y::SimFP{NB,NE,EBIASY,RM}) where {NB,NE,EBIASX,EBIASY,RM}
  return SimFP{NB,NE,EBIASX+EBIASY,RM}(x.v * y.v)
end

function /(x::SimFP{NB,NE,EBIASX,RM}, y::SimFP{NB,NE,0,RM}) where {NB,NE,EBIASX,RM}
  return SimFP{NB,NE,EBIASX,RM}(x.v / y.v)
end

function /(x::SimFP{NB,NE,EBIASX,RM}, y::SimFP{NB,NE,EBIASY,RM}) where {NB,NE,EBIASX,EBIASY,RM}
  return SimFP{NB,NE,EBIASX-EBIASY,RM}(x.v / y.v)
end

function /(x::SimFP{NB,NE,EBIAS,RM}, y::Int64) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(x.v / Float64(y))
end

function -(x::SimFP{NB,NE,EBIAS,RM}) where {NB,NE,EBIAS,RM}
  return SimFP{NB,NE,EBIAS,RM}(-x.v)
end

function +(::Type{NoDynamicBias}, ::Type{NoDynamicBias})
  return NoDynamicBias;
end

function -(::Type{NoDynamicBias}, ::Type{NoDynamicBias})
  return NoDynamicBias;
end

function rescale_bias(::Type{SimFP{NB,NE,EBIAS,RM}}, hint) where {NB,NE,EBIAS,RM}
  hint64 = Float64(hint);
  eb = -Int64(floor(log2(hint64)));
  println("based on hint ($(hint64)), setting exponent bias to: $(eb)")
  return SimFP{NB,NE,eb,RM};
end

function rescale_bias(::Type{SimFP{NB,NE,NoDynamicBias,RM}}, hint) where {NB,NE,RM}
  return SimFP{NB,NE,NoDynamicBias,RM};
end

function rescale_bias(::Type{Float16}, hint)
  return Float16
end

function rescale_bias(::Type{Float32}, hint)
  return Float32
end

function rescale_bias(::Type{Float64}, hint)
  return Float64
end

rescale_bias (generic function with 5 methods)

# generate the data
Random.seed!(424242)
d = 50;
N = 10000;
wtrue = randn(d);
wtrue = d^2 * wtrue / norm(wtrue);
X = randn(N, d);
X ./= sqrt.(sum(X.^2; dims=2));
Y = (1 ./ (1 .+ exp.(-X * wtrue)) .>= rand(N)) .* 2 .- 1;
sigma = 1e-3;

w0 = zeros(d);

function sgd_logreg(w0::Array{T,1}, alpha::T, gamma::T, X::Array{T,2}, Y::Array{T,1}, sigma::T, niters::Int64, wopt::Array{T,1}) where {T<:Number}
    w = w0
    (N, d) = size(X)
    dist2_to_optimum = zeros(niters)
    for k = 1:niters
        i = rand(1:N)
        xi = X[i,:];
        yi = Y[i];
        w += -alpha * sigma * w + alpha * xi * yi / (one(alpha) .+ exp.(yi * dot(xi, w)));
        dist2_to_optimum[k] = sum((w - wopt).^2);
    end
    return (w, dist2_to_optimum);
end

function sgd_logreg(::Type{T}, w0, alpha, gamma, X, Y, sigma, niters, wopt) where {T<:Number}
    (w, dist2_to_optimum) = sgd_logreg(T.(w0), T(alpha), T(gamma), T.(X), T.(Y), T.(sigma), niters, T.(wopt))
    return (Float64.(w), Float64.(dist2_to_optimum));
end

sgd_logreg (generic function with 2 methods)

# find the true minimum
function newton_logreg(w0, X, Y, sigma, niters)
    N = size(X, 1);
    d = size(X, 2);
    w = w0;
    for k = 1:niters
        g = -X' * (Y ./ (1 .+ exp.(Y .* (X * w)))) + N * sigma * w;
        H = X' * ((1 ./ ((1 .+ exp.(Y .* (X * w))) .* (1 .+ exp.(-Y .* (X * w))))) .* X) + N * sigma * I;
        w = w - H \ g;
        # println("gradient norm: $(norm(g))")
    end
    return w
end

wopt = newton_logreg(wtrue, X, Y, sigma, 20);

# define low-precision types

Float16Rand = SimFP{10,5,0,RoundingModeRandom};
Float16Near = SimFP{10,5,0,RoundingModeNearest};

alpha = 0.1;

Random.seed!(123456);
(w, dto) = sgd_logreg(Float32, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w2, dto2) = sgd_logreg(Float16Rand, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w3, dto3) = sgd_logreg(Float16Near, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

plot(dto; label="Float32");
plot(dto2; label="Float16 (random)");
plot(dto3; label="Float16 (nearest)");
legend();
ylim([0,10]);

alpha = 0.1;

Float12Rand = SimFP{6,5,0,RoundingModeRandom};
Float12Near = SimFP{6,5,0,RoundingModeNearest};

Random.seed!(123456);
(w, dto) = sgd_logreg(Float32, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w2, dto2) = sgd_logreg(Float12Rand, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w3, dto3) = sgd_logreg(Float12Near, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

plot(dto; label="Float32");
plot(dto2; label="Float12 (random)");
plot(dto3; label="Float12 (nearest)");
legend();
ylim([0,10]);

alpha = 1.0;

Float8Rand = SimFP{4,5,0,RoundingModeRandom};
Float8Near = SimFP{4,5,0,RoundingModeNearest};

Random.seed!(123456);
(w, dto) = sgd_logreg(Float32, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w2, dto2) = sgd_logreg(Float8Rand, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

Random.seed!(123456);
(w3, dto3) = sgd_logreg(Float8Near, w0, alpha, sigma, X, Y, sigma, 100000, wopt);

plot(dto; label="Float32");
plot(dto2; label="Float8 (random)");
plot(dto3; label="Float8 (nearest)");
legend();

Effects of Scan Order¶

Consider a simple linear regression task. Our objective is of the form $f(w) = \frac{1}{2n} \sum_{i=1}^n (x_i^T w - y_i)^2.$ Let's suppose we are working in $w \in \mathbb{R}^2$ , and pick a specific set of training examples $x_i = \left[ \begin{array}{c} \cos(\pi i / n) \\ \sin(\pi i / n) \end{array} \right], \hspace{2em} y_i = 0.$ Our stochastic gradient updates will be $w_{t+1} = w_t - \alpha x_i (x_i^T w_t - y_i).$ Let's pick $\alpha = 1$ and see what happens when we vary the scan order.

(This is the normalized tight frame example from here: https://people.eecs.berkeley.edu/~brecht/papers/12.Recht.Re.amgm.pdf).

n = 99;
d = 2;
xs = [[cos(pi * i / n), sin(pi * i / n)] for i = 1:n];
ys = [0.0 for i = 1:n];

w0 = rand(d);
alpha = 1.0;

# true solution is just zero
w_opt = zeros(d);

function sgd_sequential_scan(xs, ys, w0, alpha, w_opt, num_iters)
    n = length(xs);
    w = w0;
    dists_to_opt = Float64[];
    for t = 1:num_iters
        i = mod1(t,n);
        xi = xs[i];
        yi = ys[i];
        w -= alpha * xi * (dot(xi, w) - yi);
        push!(dists_to_opt, norm(w - w_opt));
    end
    return dists_to_opt;
end

sgd_sequential_scan (generic function with 1 method)

function sgd_random_scan(xs, ys, w0, alpha, w_opt, num_iters)
    n = length(xs);
    w = w0;
    dists_to_opt = Float64[];
    for t = 1:num_iters
        i = rand(1:n);
        xi = xs[i];
        yi = ys[i];
        w -= alpha * xi * (dot(xi, w) - yi);
        push!(dists_to_opt, norm(w - w_opt));
    end
    return dists_to_opt;
end

sgd_random_scan (generic function with 1 method)

num_iters = 500;

dto_seq_scan = sgd_sequential_scan(xs, ys, w0, alpha, w_opt, num_iters);
dto_rnd_scan = sgd_random_scan(xs, ys, w0, alpha, w_opt, num_iters);

semilogy(dto_seq_scan; label="sequential scan");
semilogy(dto_rnd_scan; label="random scan");

legend(loc=3);
xlabel("iteration");
ylabel("distance to optimum");
title("Comparison of Different Scan Orders for SGD");

Question: is sequential scan generally bad for this problem, or was this order just particularly bad?¶

Let's investigate by looking at random permutations of the input data.

dto_seq_scan_ro = [sgd_sequential_scan(shuffle(xs), ys, w0, alpha, w_opt, num_iters) for i = 1:5];

semilogy(dto_seq_scan; label="sequential scan");
semilogy(dto_rnd_scan; label="random scan");
for k = 1:5
    semilogy(dto_seq_scan_ro[k]; label="sequential scan (random order $k)");
end

legend(loc=3);
xlabel("iteration");
ylabel("distance to optimum");
title("Comparison of Different Scan Orders for SGD");