import numpy as np
import matplotlib.pyplot as plt
it is possible to use symbols in Python using sympy. It is way more clumsy than Mathematica unfortunately, but still possible.
import sympy
sympy.__version__
some input/output handlers and beautifier
from sympy import latex
from sympy.printing.mathml import mathml
from sympy import mathematica_code as mcode
from IPython.display import display
let us introduce some symbols. A symbol is a variable that is handled by sympy
from sympy.abc import theta, lamda
sympy.init_printing()
display(theta)
y = sympy.sin(theta)
display(y)
print(theta.is_complex)
x, l, z = sympy.symbols('x l z',real=True,positive=True)
sympy.abc.lamda
cdf = sympy.symbols('cdf')
cdf=sympy.integrate(l*sympy.exp(-l*z), (z, 0, x))
display(cdf)
mathml(cdf)
latex(cdf)
mcode(cdf)
we evaluate the function cdf with the method subs
%timeit cdf.subs([(x,1.0),(l,1.0)])
cdf.subs([(x,1.0),(l,1.0)])
sol = sympy.symbols('sol')
sol = sympy.solveset(sympy.Eq(cdf,0.19),x,domain=sympy.S.Reals)
print(sol)
sol.subs([(l,2.2)])
r_values=np.random.rand(5)
r_values
Solve for $x\in \mathbb{R}$: $$CDF(x)=\int^x_0 pdf(z) dz =\xi$$
def solver(xi,laa):
return (sympy.solveset(sympy.Eq(cdf, xi ),x,domain=sympy.S.Reals)).subs([(l,laa)])
just a different definition in which I substite $\lambda$ before solving
def solver(xi,laa):
return (sympy.solveset(sympy.Eq(cdf.subs(l,laa), xi ),x,domain=sympy.S.Reals))
r_values=np.random.rand(5)
[ solver(xi,3.0) for xi in r_values ];
r_values=np.random.rand(5)
list(map(lambda xi: solver(xi,3.0), r_values))
print(r_values)
T = cdf.subs([(x,1.0),(l,1.0)])
cdf_lambda_function = sympy.lambdify((x,l), T, "numpy")
cdf_lambda_function((1.0,1.0))
from scipy import integrate
from scipy.optimize import fsolve
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
def CDF(x,lam):
return 1 - scipy.exp(-lam*x)
CDF(1,1.2)
del eq
def eq(x,lam,xi):
return CDF(x,lam) - xi
#return x - lam*xi
r_values=np.random.rand(20000)
_exp_r_values = [ scipy.optimize.root(eq, 0.15 , args=(3.0, xii,)) for xii in r_values ]
exp_r_values = [ sol['x'] for sol in _exp_r_values if sol['success'] ]
vals=np.transpose(np.array(exp_r_values))[0]
bins = np.linspace(0, 3, 15)
counts, binbegend , p = plt.hist(vals,bins,log=True)
def PDF(x,lam):
return lam*scipy.exp(-lam*x)
the integratal of the PDF as function of the end of the range. (integrated with classical numerical mathods)
def CDFi(x,lam):
return integrate.quad(PDF, 0, x,args=(lam,))
CDFi(1,1.2)
the equation CDF(x) = xi
$$CDF(x)=\int^x_0 pdf(z) dz =\xi$$
#del eqI
def eqI(x,lam,xi):
return CDFi(x,lam) - xi
#return x - lam*xi
Solve for $x\in \mathbb{R}$:
r_values=np.random.rand(20000)
_exp_r_values = [ scipy.optimize.root(eqI, 0.15 , args=(3.0, xii,)) for xii in r_values ]
exp_r_values = [ sol['x'] for sol in _exp_r_values if sol['success'] ]
len(exp_r_values)
vals=np.transpose(np.array(exp_r_values))[0]
bins = np.linspace(0, 3, 15)
counts, binbegend , p = plt.hist(vals,bins,log=True)
def PDF(x,lam):
return lam*scipy.exp(-lam*x)
is it normalized on $[0,\infty$]?
integrate.quad(PDF, 0, scipy.inf,args=(3.0,),epsrel=0.001)
def CDFi(x,lam):
return integrate.quad(PDF, 0, x,args=(lam,))
CDFi(1,1.2)
del eqI
def eqI(x,lam,xi):
return CDFi(x,lam) - xi
#return x - lam*xi
_exp_r_values[0]
r_values=np.random.rand(20000)
_exp_r_values = [ scipy.optimize.root(eqI, 0.15 , args=(3.0, xii,)) for xii in r_values ]
exp_r_values = [ sol['x'] for sol in _exp_r_values if sol['success'] ]
len(exp_r_values)
vals=np.transpose(np.array(exp_r_values))[0]
bins = np.linspace(0, 3, 15)
counts, binbegend , p = plt.hist(vals,bins,log=False)
def pdf(x,omega):
return 1+scipy.cos(-omega*x)
plot the PDF
x=np.linspace(0,1,1000);
y=pdf(x,34);
plt.plot(x,y)
plt.show()
def PDF(x,o):
normalization=integrate.quad(pdf, 0, 1,args=(o,))
return pdf(x,o)/normalization[0]
integrate.quad(pdf, 0, 1,args=(34.0,))
integrate.quad(PDF, 0, 1,args=(34.0,))
x=np.linspace(0,1,1000);
y=PDF(x,34);
plt.plot(x,y)
plt.show()
try to enclose it in a uniform distribution $h(x)=1\,\, for\,\, 0<x<1$
max(y)
it is just a contast function $C\cdot h(x)$ for $C=\max_{[0,1]}(y)$
nP=10**5;
x_values=np.random.rand(nP)
pdf_values=PDF(x_values,34);
u_values=np.random.rand(nP)
sub_set_x_values=x_values[pdf_values-u_values*max(y)>0]
plt.hist(sub_set_x_values,100,normed=True)
plt.show()
print(len(sub_set_x_values))
10**5/max(y)
criterion_value=np.row_stack((pdf_values-u_values*max(y)>0,x_values))
selected_values=np.select([criterion_value[0].astype(bool)],[criterion_value[1]])
plt.hist(selected_values[selected_values>0],100,normed=True)
plt.show()
plot the PDF
x=np.linspace(0,1,1000);
y=PDF(x,1.2);
plt.plot(x,y)
plt.show()
try to enclose it in a uniform distribution $h(x)=1\,\, for\,\, 0<x<1$
max(y)
it is just a contast function $C\cdot h(x)$ for $C=\max_{[0,1]}(y)$
_A=np.array([1,2])
_B=np.array([-1,-2])
print(np.row_stack((_A,_B)))
print(np.column_stack((_A,_B)))
print(np.vstack((_A,_B)))
print(np.hstack((_A,_B)))
_A*_B
nP=10**5;
x_values=np.random.rand(nP)
pdf_values=PDF(x_values,1.2);
u_values=np.random.rand(nP)
sub_set_x_values=x_values[pdf_values-u_values*max(y)>0]
plt.hist(sub_set_x_values,100,normed=True)
plt.show()
print(len(sub_set_x_values))
criterion_value=np.row_stack((pdf_values-u_values*max(y)>0,x_values))
selected_values=np.select([criterion_value[0].astype(bool)],[criterion_value[1]])
plt.hist(selected_values[selected_values>0],100,normed=True)
plt.show()
Excercise: redo this for $x \in [0,10]$
values=np.random.choice(bins, 100000)
plt.hist(values)
plt.show()
probabilities=np.random.rand(len(bins))
probabilities=probabilities/np.sum(probabilities)
values=np.random.choice(bins, 100000, p=probabilities)
plt.hist(values)
plt.show()
from scipy import stats
tuple(discrete_pdf)
custm = stats.rv_discrete(name='custm', values=(bins, tuple(discrete_pdf)))
custm.pmf(0.2)
fig, ax = plt.subplots(1, 1)
xk=bins
ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
plt.show()
bins=np.arange(0,1,0.001)
discrete_pdf=np.power(bins,0.5)*(1-bins)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
#print(bins);
#print(discrete_pdf);
print(np.sum(discrete_pdf))
plt.plot(bins,discrete_pdf)
plt.show()
values=np.random.choice(bins, 100000, p=discrete_pdf)
plt.hist(values,100)
plt.show()
bins=np.arange(0,1,0.001)
discrete_pdf=np.power(bins,0.5)*np.power((1-bins),2.3)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
#print(bins);
#print(discrete_pdf);
print(np.sum(discrete_pdf))
plt.plot(bins,discrete_pdf)
plt.show()
values=np.random.choice(bins, 100000, p=discrete_pdf)
plt.hist(values,100,normed=True)
plt.show()
try to see its derivative
discrete_diff_pdf=np.diff(discrete_pdf)
#print(discrete_diff_pdf)
plt.plot(discrete_diff_pdf)
plt.show()
and its integral
discrete_diff_pdf=1-np.cumsum(discrete_pdf)
print(find_nearest(discrete_diff_pdf,0.5))
#print(discrete_diff_pdf)
plt.plot(discrete_diff_pdf)
plt.show()
a specular behaviour on the other party
bins=np.arange(0,1,0.001)
discrete_pdf=np.power(bins,0.75)*np.power((1-bins),2.3)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
bins=1-(bins-1)
#print(bins);
#print(discrete_pdf);
#print(np.sum(discrete_pdf))
plt.plot(bins,discrete_pdf)
plt.show()
values=np.random.choice(bins, 100000, p=discrete_pdf)
plt.hist(values,100)
plt.show()
discrete_diff_pdf=np.cumsum(discrete_pdf)
print(find_nearest(discrete_diff_pdf,0.5))
#print(discrete_diff_pdf)
plt.plot(discrete_diff_pdf)
plt.show()
#find the current median as find_nearest( np.cumsum(pdf_values) ,0.5)
def find_nearest(array,value):
idx = (np.abs(array-value)).argmin()
return idx,array[idx]
ary=np.array([0,1,2,3])
def midpoint(ary):
return (ary[1:] + ary[:-1])/2
midpoint(ary)
bins=np.arange(0,1,0.001)
binning=np.arange(0,1,0.01)
discrete_pdf=np.power(bins,1)*np.power((1-bins),0.25)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
binning=np.sort(1-(binning-1))
bins=1-(bins-1)
discrete_pdf=np.power(bins,11)*np.power((1-bins),1)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
#print(bins);
#print(discrete_pdf);
print(np.sum(discrete_pdf))
values=np.random.choice(bins, 100000, p=discrete_pdf)
_counts,_bins, c = plt.hist(values,bins=binning,normed=True)
plt.show()
print(np.average(values))
print(np.median(values))
pdf_of_counts=_counts/np.sum(_counts)
#median0=find_nearest( np.cumsum(pdf_of_counts) ,0.5)
_new_values=np.random.choice(midpoint(_bins), 10, p=pdf_of_counts)
#print(_new_values)
print(np.average(_new_values))
print(np.median(_new_values))
plt.hist(_new_values)
new_values=np.append(values,_new_values)
print(np.average(new_values))
print(np.median(new_values))
np.average(new_values)-np.average(values)
_new_counts,_bins, c = plt.hist(new_values,bins=binning,normed=True);
new_pdf_of_counts=_new_counts/np.sum(_new_counts)
#median1=find_nearest( np.cumsum(new_pdf_of_counts) ,0.5)
#print(median0,median1)
find the shift of the mean or median (depeding on an optional argument) after nP extractions
def shift_of_metric(metric='average',increment=10,a=0.5,b=2.3, bins=np.arange(0,1,0.001), binning=np.arange(0,1,0.01),opposite=False, interactive=False):
discrete_pdf=np.power(bins,a)*np.power((1-bins),b)
discrete_pdf=discrete_pdf/np.sum(discrete_pdf)
if opposite:
binning=np.sort(1-(binning-1))
bins=1-(bins-1)
nRealData=10**4
values=np.random.choice(bins, nRealData, p=discrete_pdf)
if interactive:
_counts,_bins, c = plt.hist(values,bins=binning,normed=True) # do the histo
plt.show()
else:
_counts,_bins = np.histogram(values,bins=binning,normed=True);
pdf_of_counts=_counts/np.sum(_counts)
_new_values=np.random.choice(midpoint(_bins), increment, p=pdf_of_counts)
#np.average(_new_values)
#np.median(_new_values)
new_values=np.append(values,_new_values)
if metric=="average":
return np.average(new_values)- np.average(values)
if metric=="median":
return np.median(new_values)- np.median(values)
find the distribution of the mean or median shift
shift_of_metric(increment=10,interactive=True)
shift_of_metric(increment=10,interactive=True,opposite=True)
def shifts(increment,trials=1000,opposite=False,a=1,b=1):
return np.array([ shift_of_metric(a=a,b=b,increment=increment,opposite=opposite) for k in range(trials) ])
[ [batch_size, np.log10(np.sqrt(np.var(shifts(batch_size))))] for batch_size in [10,30,100,300,1000,3000,10000] ]
[ [batch_size, np.log10(np.sqrt(np.var(shifts(batch_size,opposite=False))))] for batch_size in [10,30,100,300,1000,3000,10000] ]
[ [batch_size, np.log10(np.abs(np.average(shifts(batch_size))))] for batch_size in [10,30,100,300,1000,3000,10000] ]
[ [batch_size, np.log10(np.abs(np.average(shifts(batch_size,opposite=False))))] for batch_size in [10,30,100,300,1000,3000,10000] ]
plt.clf()
pips_range=np.linspace(-0.001,0.001,22)
plt.hist(shifts(10),normed=True,bins=pips_range)
plt.hist(shifts(30),normed=True,color='cyan',alpha=0.7,bins=pips_range)
plt.hist(shifts(100),normed=True,color='green',alpha=0.3,bins=pips_range)
#plt.hist(shifts(300),normed=True,color='cyan',alpha=0.7)
#plt.hist(shifts(3000),normed=True,color='green',alpha=0.7)
#plt.hist(shifts(10000),normed=True,color='red',alpha=0.7)
plt.show()
def movement(increment=10,a=1,b=1):
return shifts(increment,a=a,b=b)-shifts(increment,a=a,b=b,opposite=True)
plt.clf()
_A=0.4
_B=0.02
pips_range=np.linspace(-0.001,0.001,22)
plt.hist(movement(10,a=_A,b=_B),normed=True,bins=pips_range)
plt.hist(movement(30,a=_A,b=_B),normed=True,color='cyan',alpha=0.7,bins=pips_range)
plt.hist(movement(100,a=_A,b=_B),normed=True,color='green',alpha=0.3,bins=pips_range)
#plt.hist(shifts(300),normed=True,color='cyan',alpha=0.7)
#plt.hist(shifts(3000),normed=True,color='green',alpha=0.7)
#plt.hist(shifts(10000),normed=True,color='red',alpha=0.7)
del _A,_B
plt.show()
plt.clf()
_A=0.4
_B=1.8
pips_range=np.linspace(-0.001,0.001,22)
plt.hist(movement(10,a=_A,b=_B),normed=True,bins=pips_range)
plt.hist(movement(30,a=_A,b=_B),normed=True,color='cyan',alpha=0.7,bins=pips_range)
plt.hist(movement(100,a=_A,b=_B),normed=True,color='green',alpha=0.3,bins=pips_range)
#plt.hist(shifts(300),normed=True,color='cyan',alpha=0.7)
#plt.hist(shifts(3000),normed=True,color='green',alpha=0.7)
#plt.hist(shifts(10000),normed=True,color='red',alpha=0.7)
del _A,_B
plt.show()