In [None]:
from platform import python_version
print(python_version())
import torch
print(torch.cuda.get_device_name(torch.cuda.current_device()))

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import json
import numpy as np
import scipy as sp
import scipy.stats as st
import scipy.integrate as integrate
from scipy.stats import multivariate_normal
from sklearn import linear_model
# from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
import statsmodels.api as sm
from matplotlib.colors import LogNorm

sns.set_style("whitegrid")
sns.set_palette("colorblind")
palette = sns.color_palette()
figsize = (15,8)
legend_fontsize = 16

from matplotlib import rc
rc('font',**{'family':'sans-serif'})
rc('text', usetex=True)
rc('text.latex',preamble=r'\usepackage[utf8]{inputenc}')
rc('text.latex',preamble=r'\usepackage[russian]{babel}')
rc('figure', **{'dpi': 300})

# Байесовский вывод для испытаний Бернулли

In [None]:
xs = np.arange(-0.5, 1.5, 0.0025)

prior_params = (30, 30)
experimental_data = (10, 0)

## Априорное распределение
pri = st.beta(prior_params[0], prior_params[1]).pdf
norm_prior = 1 #integrate.quad(lambda x: pri(x), -np.inf, np.inf)[0]
ys_prior = [ pri(x) / norm_prior for x in xs ]

## Правдоподобие
n_heads, n_tails = experimental_data
lk = lambda x : x ** n_heads * (1 - x) ** n_tails
ys_like = [ lk(x) for x in xs ]

## Апостериорное распределение
post = lambda x : lk(x) * pri(x)
norm_post = integrate.quad(lambda x: post(x), 0, 1)[0]
ys_post = [ post(x) / norm_post if x > 0 and x < 1 else 0 for x in xs ]

## И нарисуем
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.plot(xs, ys_prior, linewidth=2, label=r"Априорное распределение")
ax.plot(xs, ys_like, linewidth=2, label=r"Правдоподобие")
ax.plot(xs, ys_post, linewidth=2, label=r"Апостериорное распределение")
ax.set_xlim((-0.05, 1.05))
ax.set_ylim((-0.5, 8.5))
ax.set_xlabel(r"Вероятность орла $\theta$", fontsize=legend_fontsize)
ax.legend(loc="upper left", fontsize=legend_fontsize)
plt.show()

# Линейная и полиномиальная регрессия

In [None]:
## Оверфиттинг
## Исходная функция
orig = lambda x : np.sin(2*x)

## X-координаты точек данных
xd = np.array([-3, -2, -1, -0.5, 0, 0.5, 1, 1.5, 2.5, 3, 4]) / 2
num_points = len(xd)

## Данные
data = orig(xd) + np.random.normal(0, .25, num_points)

## X-координаты точек данных
xd_large = np.arange(-1.5, 2, 0.05)
num_points_l = len(xd_large)

## Данные
data_large = orig(xd_large) + np.random.normal(0, .25, num_points_l)


## Для рисования
xs = np.arange(xd[0]-1.5, xd[-1]+1.5, 0.01)

In [None]:
## Выделение полиномиальных признаков
xs_d = np.vstack([xs ** i for i in range(1, num_points+1)]).transpose()
xd_d = np.vstack([xd ** i for i in range(1, num_points+1)]).transpose()

## Какие степени многочлена будем обучать и рисовать
set_of_powers = [ 3, 10]

fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.set_xlim((xs[0], xs[-1]))
ax.set_ylim((-2, 2))
ax.scatter(xd, data, marker='*', s=120)
ax.plot(xs, orig(xs), linewidth=1, label="Исходная функция", color="black")

for d in set_of_powers:
    if d == 0:
        print(np.mean(data))
        ax.hlines(np.mean(data), xmin=xs[0], xmax=xs[-1], label="$d=0$", linestyle="dashed")
    else:
        cur_model = linear_model.LinearRegression(fit_intercept=True).fit( xd_d[:, :d], data )
        print(cur_model.coef_)
        ax.plot(xs, cur_model.predict( xs_d[:, :d] ), linewidth=2, label="$d=%d$" % d)

ax.legend(loc="upper left", fontsize=legend_fontsize)
ax.set_xlim((-2, 2.5))
# plt.show()

## Функции активации

In [None]:
import torch.optim as optim
# optimizer = optim.SGD(model.parameters(), lr=0.1)

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
xs = np.linspace(-5, 5, 500)
lw = 1.5

relu = np.vectorize(lambda x : max(0.0, x))
thresh = np.vectorize(lambda x : 1 if x >= 0 else 0)

ax.plot(xs, thresh(xs), linewidth=lw, label="Threshold activation")
ax.plot(xs, 1. / (1 + np.exp(-xs)), linewidth=lw, label="Logistic sigmoid")
ax.plot(xs, np.tanh(xs), linewidth=lw, label="Hyperbolic tangent $\\tanh$")
ax.plot(xs, relu(xs), linewidth=lw, label="ReLU activation")

ax.set_ylim((-1., 2.))
ax.set_xlim((-5., 5.))
ax.legend(loc="upper left")

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
xs = np.linspace(-5, 5, 500)
lw = 1.5

relu = np.vectorize(lambda x : max(0.0, x))
softplus = np.vectorize(lambda x : np.log(1 + np.exp(x)))
lrelu = np.vectorize(lambda x : x if x >= 0 else 0.2*x)
lrelu2 = np.vectorize(lambda x : x if x >= 0 else 0.05*x)
elu = np.vectorize(lambda x : x if x >= 0 else 1.0*(np.exp(x)-1))
elu2 = np.vectorize(lambda x : x if x >= 0 else 0.2*(np.exp(x)-1))
thresh = np.vectorize(lambda x : 1 if x >= 0 else 0)

ax.plot(xs, relu(xs), linewidth=lw, label="ReLU activation")
ax.plot(xs, softplus(xs), linewidth=lw, label="Softplus")
ax.plot(xs, lrelu(xs), linewidth=lw, label="Leaky ReLU, $a=\\frac{1}{5}$")
ax.plot(xs, lrelu2(xs), linewidth=lw, label="Leaky ReLU, $a=\\frac{1}{20}$")
ax.plot(xs, elu(xs), linewidth=lw, label="Exponential linear unit, $\\alpha=1.0$")
ax.plot(xs, elu2(xs), linewidth=lw, label="Exponential linear unit, $\\alpha=\\frac{1}{5}$")

ax.set_ylim((-1., 1.))
ax.set_xlim((-5., 1.))
ax.legend(loc="upper left")

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
xs = np.linspace(-5, 5, 500)
lw = 1.5

relu = np.vectorize(lambda x : max(0.0, x))
swish = np.vectorize( lambda x : x / (1. + np.exp(-x)))
swish2 = np.vectorize( lambda x : x / (1. + np.exp(-5*x)))
mish = np.vectorize(lambda x : x * np.tanh(np.log(1 + np.exp(x))))

ax.plot(xs, relu(xs), linewidth=lw, label="ReLU activation")
ax.plot(xs, swish(xs), linewidth=lw, label="Swish activation, $\\beta=1$")
ax.plot(xs, swish2(xs), linewidth=lw, label="Swish activation, $\\beta=5$")
ax.plot(xs, mish(xs), linewidth=lw, label="Mish activation")

ax.set_ylim((-.5, 2.))
ax.set_xlim((-4., 2.))
ax.legend(loc="upper left")

In [None]:
fig, ax = plt.subplots(figsize=(10,5))
xs = np.linspace(-5, 5, 500)
lw = 1.5

def aconc(a1, a2, beta):
    return lambda x : (a1-a2)*x / (1.0 + np.exp(-x*beta*(a1-a2))) + a2*x

acon1 = np.vectorize( aconc(1.0, 0.0, 1.0) )
acon2 = np.vectorize( aconc(1.2, -0.1, 1.0) )
acon3 = np.vectorize( aconc(1.0, -0.8, 1.0) )
acon4 = np.vectorize( aconc(1.0, -0.8, 0.1) )
acon5 = np.vectorize( aconc(1.0, -0.8, 0.01) )

ax.plot(xs, acon1(xs), linewidth=lw, label="ACON-C, $a_1=1$, $a_2=0$, $\\beta=1$")
ax.plot(xs, acon2(xs), linewidth=lw, label="ACON-C, $a_1=1.2$, $a_2=-0.1$, $\\beta=1$")
ax.plot(xs, acon3(xs), linewidth=lw, color="C3", label="ACON-C, $a_1=1$, $a_2=-0.8$, $\\beta=1$")
ax.plot(xs, acon4(xs), linewidth=lw, color="C3", linestyle="dashed", label="ACON-C, $a_1=1$, $a_2=-0.8$, $\\beta=0.1$")
ax.plot(xs, acon5(xs), linewidth=lw, color="C3", linestyle="dotted", label="ACON-C, $a_1=1$, $a_2=-0.8$, $\\beta=0.01$")

ax.set_ylim((-.5, 4.))
ax.set_xlim((-4., 4.))
ax.legend(loc="upper center")