using LinearAlgebra;
n = 1024;
m = 1024;
p = 1024;
A = randn(Float32,m,n);
B = randn(Float32,n,p);
# array to hold result of multiply
C = zeros(Float32,m,p);
@time mul!(C, A, B);
0.011916 seconds (4 allocations: 160 bytes)
function my_mul!(C::Matrix{Float32}, A::Matrix{Float32}, B::Matrix{Float32})
(m,n) = size(A);
(n,p) = size(B);
for i = 1:m
for k = 1:p
acc = 0.0;
for j = 1:n
acc += A[i,j] * B[j,k];
end
C[i,k] = acc;
end
end
return C;
end
my_mul! (generic function with 2 methods)
@time my_mul!(C, A, B);
2.036659 seconds (4 allocations: 160 bytes)