Let's test the effect of parallelism on some numerics in python

Start with telling numpy to use 4 threads

In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"
import numpy
import time
In [2]:
# look at matrix-matrix multiply
n = 1024 * 16
p = 32
A = numpy.random.rand(n, n)
X = numpy.random.rand(n, p)
X1 = numpy.random.rand(n, 1)
In [3]:
t = time.time()
for i in range(10):
    numpy.dot(A, X)
elapsed_4t_mm = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mm/10))
time elapsed: 0.388520
In [4]:
t = time.time()
for i in range(10):
    numpy.dot(A, X1)
elapsed_4t_mv = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mv/10))
time elapsed: 0.111653

RESTART THE KERNEL

In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
import numpy
import time
In [2]:
# look at matrix-matrix multiply
n = 1024 * 16
p = 32
A = numpy.random.rand(n, n)
X = numpy.random.rand(n, p)
X1 = numpy.random.rand(n, 1)
In [3]:
t = time.time()
for i in range(10):
    numpy.dot(A, X)
elapsed_4t_mm = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mm/10))
time elapsed: 0.604862
In [4]:
t = time.time()
for i in range(10):
    numpy.dot(A, X1)
elapsed_4t_mv = time.time() - t
print("time elapsed: %f" % (elapsed_4t_mv/10))
time elapsed: 0.122234
In [ ]:
 
In [ ]: