Start with telling numpy to use 4 threads
import os
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"
import numpy
import time
# look at matrix-matrix multiply
n = 1024 * 16
p = 32
A = numpy.random.rand(n, n)
X = numpy.random.rand(n, p)
X1 = numpy.random.rand(n, 1)
t = time.time()
for i in range(10):
numpy.dot(A, X)
elapsed_4t_mm = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mm/10))
t = time.time()
for i in range(10):
numpy.dot(A, X1)
elapsed_4t_mv = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mv/10))
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
import numpy
import time
# look at matrix-matrix multiply
n = 1024 * 16
p = 32
A = numpy.random.rand(n, n)
X = numpy.random.rand(n, p)
X1 = numpy.random.rand(n, 1)
t = time.time()
for i in range(10):
numpy.dot(A, X)
elapsed_4t_mm = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mm/10))
t = time.time()
for i in range(10):
numpy.dot(A, X1)
elapsed_4t_mv = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mv/10))