In [1]:
import pandas as pd 
In [2]:
import numpy as np
import matplotlib.pyplot as plt
In [3]:
plt.plot( np.linspace(0,1))
Out[3]:
[<matplotlib.lines.Line2D at 0x7f559b1322e8>]

Section

In [4]:
np.sqrt(2.2)
np.sqrt(22.222222222222222222222)
Out[4]:
4.714045207910317

Parallelism with Ray

In [5]:
import ray, time
In [6]:
# A regular Python function.
def regular_function():
    return 1

# A Ray remote function.
@ray.remote
def remote_function():
    return 1
In [7]:
ray.init(num_cpus=4, ignore_reinit_error=True, include_webui=False)
2020-01-31 06:08:53,928	INFO node.py:497 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-01-31_06-08-53_927217_33912/logs.
2020-01-31 06:08:54,059	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:24343 to respond...
2020-01-31 06:08:54,183	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:51853 to respond...
2020-01-31 06:08:54,190	INFO services.py:806 -- Starting Redis shard with 6.59 GB max memory.
2020-01-31 06:08:54,218	INFO node.py:511 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-01-31_06-08-53_927217_33912/logs.
2020-01-31 06:08:54,224	INFO services.py:1441 -- Starting the Plasma object store with 9.89 GB memory using /dev/shm.
Out[7]:
{'node_ip_address': '192.168.37.13',
 'redis_address': '192.168.37.13:24343',
 'object_store_address': '/tmp/ray/session_2020-01-31_06-08-53_927217_33912/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-01-31_06-08-53_927217_33912/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2020-01-31_06-08-53_927217_33912'}

This is how I run the function and get the results. For more complex functions it will be the same, just the function code will be different.

In [8]:
ray.get(remote_function.remote())
Out[8]:
1

Just-in-time compilation with Numba

In [13]:
from numba import jit
import numba
import random
In [14]:
print(numba.__version__)
0.48.0
In [9]:
def monte_carlo_pi_ton(nsamples):
    '''
    from https://numba.pydata.org/
    '''
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x ** 2 + y ** 2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

The nopython=True option requires that the function be fully compiled (so that the Python interpreter calls are completely removed), otherwise an exception is raised. These exceptions usually indicate places in the function that need to be modified in order to achieve better-than-Python performance. We strongly recommend always using nopython=True.

In [11]:
@jit(nopython=True)
def monte_carlo_pi(nsamples):
    acc = 0
    for i in range(nsamples):
        x = random.random()
        y = random.random()
        if (x ** 2 + y ** 2) < 1.0:
            acc += 1
    return 4.0 * acc / nsamples

This first time the function was called, a new version of the function was compiled and executed. If we call it again, the previously generated function executions without another compilation step.

In [15]:
%timeit monte_carlo_pi(10000)
344 µs ± 5.32 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [16]:
%timeit monte_carlo_pi(10000)
121 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
In [17]:
%timeit monte_carlo_pi(10000)
126 µs ± 1.58 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
In [21]:
%timeit monte_carlo_pi_ton(10000)
3.7 ms ± 65.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [22]:
%timeit monte_carlo_pi(1000000)
12.1 ms ± 82.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [23]:
%timeit monte_carlo_pi_ton(1000000)
353 ms ± 7.53 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [24]:
%timeit monte_carlo_pi(100000000)
1.16 s ± 9.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [25]:
%timeit monte_carlo_pi_ton(100000000)
36.3 s ± 756 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

A factor 30 speed-up from compilation

Ray and Numba

In [28]:
# A Ray remote function.
# structure from https://github.com/numba/numba/issues/4256
@ray.remote
def monte_carlo_pi_numba_ray(N=10**6):
    return monte_carlo_pi(N)
In [37]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(2)])
15.3 ms ± 334 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [38]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(4)])
16 ms ± 633 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [39]:
ray.shutdown()
In [40]:
ray.init(num_cpus=20, ignore_reinit_error=True, include_webui=False)
2020-01-31 06:40:08,806	INFO node.py:497 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-01-31_06-40-08_805684_33912/logs.
2020-01-31 06:40:08,926	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:18293 to respond...
2020-01-31 06:40:09,052	INFO services.py:409 -- Waiting for redis server at 127.0.0.1:27498 to respond...
2020-01-31 06:40:09,059	INFO services.py:806 -- Starting Redis shard with 6.59 GB max memory.
2020-01-31 06:40:09,095	INFO node.py:511 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2020-01-31_06-40-08_805684_33912/logs.
2020-01-31 06:40:09,101	INFO services.py:1441 -- Starting the Plasma object store with 9.89 GB memory using /dev/shm.
Out[40]:
{'node_ip_address': '192.168.37.13',
 'redis_address': '192.168.37.13:18293',
 'object_store_address': '/tmp/ray/session_2020-01-31_06-40-08_805684_33912/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-01-31_06-40-08_805684_33912/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2020-01-31_06-40-08_805684_33912'}
In [41]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(2)])
14.5 ms ± 3.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [42]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(4)])
17.1 ms ± 3.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [45]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(12)])
22.9 ms ± 1.23 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [44]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(16)])
28 ms ± 1.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [46]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(48)])
66.7 ms ± 6.12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [48]:
#ray.init()
%timeit ray.get([monte_carlo_pi_numba_ray.remote() for i in range(96)])
110 ms ± 1.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

cpuOccupation.png

In [ ]: