using PyPlot
using LinearAlgebra
Suppose we want to solve a simple optimization problem over $\mathbb{R}^d$ with objective function $$ f(x) = \frac{1}{2} \| x \|^2. $$ Here, the gradient is $$ f'(x) = x. $$
function grad_f(x)
return x
end
Suppose that our stochastic objective samples are of the form $$\tilde f(x) = \frac{1}{2} \| x \|^2 + x^T z$$ where $z \sim N(0,\sqrt{d} \cdot I)$ is a standard Gaussian random variable with expected norm-squared $1$. Then our gradient samples will be of the form $$ \nabla \tilde f(x) = x + z. $$
function sample_grad_f(x)
return x + randn(length(x)) / sqrt(length(x))
end
function gradient_descent(x0, alpha, num_iters)
dist_to_optimum = zeros(num_iters)
x = x0
for t = 1:num_iters
x = x - alpha * grad_f(x)
dist_to_optimum[t] = norm(x)
end
return dist_to_optimum
end
function stochastic_gradient_descent(x0, alpha, num_iters)
dist_to_optimum = zeros(num_iters)
x = x0
for t = 1:num_iters
x = x - alpha * sample_grad_f(x)
dist_to_optimum[t] = norm(x)
end
return dist_to_optimum
end
x0 = [5.0];
alpha = 0.1;
num_iters = 1000;
gd_dist = gradient_descent(x0, alpha, num_iters);
sgd_dist = stochastic_gradient_descent(x0, alpha, num_iters);
plot(1:num_iters, gd_dist, 1:num_iters, sgd_dist);
semilogy(1:num_iters, gd_dist, 1:num_iters, sgd_dist);
x0 = 2 * randn(100);
alpha = 0.1;
num_iters = 5000;
gd_dist = gradient_descent(x0, alpha, num_iters);
sgd_dist = stochaswtic_gradient_descent(x0, alpha, num_iters);
plot(1:num_iters, gd_dist, 1:num_iters, sgd_dist);
plot(1:num_iters, gd_dist, 1:num_iters, sgd_dist);
ylim([-0.1,0.6]);
semilogy(1:num_iters, gd_dist, 1:num_iters, sgd_dist);