In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import json
import numpy as np
import scipy as sp
import scipy.stats as st
import scipy.integrate as integrate
from sklearn import linear_model
from collections import Counter

sns.set_style("whitegrid")
sns.set_palette("colorblind")
palette = sns.color_palette()
figsize = (10,6)
legend_fontsize = 16

from matplotlib import rc
rc('font',**{'family':'sans-serif'})
rc('text', usetex=True)
rc('text.latex',preamble=r'\usepackage[utf8]{inputenc}')
rc('text.latex',preamble=r'\usepackage[russian]{babel}')
rc('figure', **{'dpi': 300})

## Обучение равномерного распределения

In [None]:
N, num_samples = 100, 150000
sample = np.array([np.random.choice(N, size=10, replace=False) + 1 for _ in range(num_samples)])

def plot_tanks1():
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(111)
    for i,k in enumerate([1, 2, 3, 5]):
        hist = np.histogram(np.max(sample[:, :k], axis=1), bins=range(1, N+2))
        ax.plot(range(1, N+1), hist[0] / num_samples, label=r'$k=%d$, $N=%d$' % (k, N), color=palette[i], linestyle='-')
    ax.set_xlim((1, 100))
    ax.set_ylim((0, 0.05))
    ax.set_xlabel('Значение максимума выборки', fontsize=legend_fontsize)
    ax.legend(fontsize=legend_fontsize, loc="upper left")
    plt.show()
    
plot_tanks1()

In [None]:
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
for i,k in enumerate([1, 2, 3, 5]):
    max_hist = Counter(np.max(sample[:, :k], axis=1)*(k+1)/k - 1)
    xs = sorted(max_hist.keys())
    ys = [max_hist[i]/ num_samples for i in xs]
    ax.plot(xs + [xs[-1]+1], ys + [0], label=r'$k=%d$, $N=100$' % k, color=palette[i], linestyle='-')
ax.set_xlabel('Значение оценки $\hat N$', fontsize=legend_fontsize)
ax.axvline(x=100, ymin=0, ymax=1, linestyle=':', c='black', linewidth=1)
ax.legend(fontsize=legend_fontsize, loc="upper left")
ax.set_xlim((1, 200))
ax.set_ylim((0, 0.05))
plt.show()

In [None]:
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)

M, k, xmax, sample, hist, ys = 10, 2, 40, dict(), dict(), dict()

for i,k in enumerate([2, 3, 5]):
    xsp = list(np.arange(k, M)) + [M-0.01] + list(np.arange(M, xmax))
    ysp = [ (k-1) * sp.special.binom(M, k) / (sp.special.binom(x, k) * M) if x >= M else 0 for x in xsp]
    ax.plot(xsp, ysp, label=r'$p(N\mid M, k)$, $M=%d$, $k=%d$' % (M, k), color=palette[i])

k = 2
for i,M in enumerate([15, 20]):
    xsp = list(np.arange(k, M)) + [M-0.01] + list(np.arange(M, xmax))
    ysp = [ (k-1) * sp.special.binom(M, k) / (sp.special.binom(x, k) * M) if x >= M else 0 for x in xsp]
    ax.plot(xsp, ysp, label=r'$p(N\mid M, k)$, $M=%d$, $k=%d$' % (M, k), color=palette[i])

ax.legend(fontsize=legend_fontsize, loc="upper right")
ax.set_xlim((9, 30))
ax.set_ylim((0, 0.42))
plt.show()

In [None]:
M, k, Nmax, num_samples = 10, 2, 40, 5000
counter = {}
for N in range(10, Nmax+1):
    sample = np.array([np.random.choice(N, size=k, replace=False) + 1 for _ in range(num_samples)])
    sample_max = np.max(sample, axis=1)
    for i in sample_max:
        counter[(i, N)] = counter.get((i, N), 0) + 1

result = {x[1]:v for x,v in counter.items() if x[0] == M}
result_total = np.sum([v for v in result.values()])
xs = list(np.arange(M, Nmax+1))
ys = [result.get(x, 0) / result_total for x in xs]
ysp = np.array([ (k-1) * sp.special.binom(M, k) / (sp.special.binom(x, k) * M) if x >= M else 0 for x in xs])
ys_norm = ysp / np.sum(ysp)

fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.plot([1] + xs, [0] + ys, drawstyle='steps-post', label=r'Выборка, $N = 10,\ldots,%d$, $M=%d$, $k=%d$' % (Nmax, M, k), color=palette[1])
ax.fill_between(xs, ys, color=palette[1], alpha=0.4, step='post')
ax.plot([1,M-0.01] + xs, [0,0] + list(ysp), label=r'$p(N\mid M, k)$, $N = 10,\ldots,%d$, $M=%d$, $k=%d$' % (Nmax, M, k), color=palette[0])
ax.set_xlim((8, Nmax))
ax.set_ylim((0, 0.34))
ax.legend(fontsize=legend_fontsize, loc="upper right")
ax.grid(which='major', color='#AAAAAA', linewidth=0.7, linestyle=':')
plt.show()

In [None]:
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.plot([1] + xs, [0] + ys, drawstyle='steps-post', label=r'Выборка, $N = 10,\ldots,%d$, $M=%d$, $k=%d$' % (Nmax, M, k), color=palette[1])
ax.fill_between(xs, ys, color=palette[1], alpha=0.4, step='post')
ax.plot([1,M-0.01] + xs, [0,0] + list(ys_norm), label=r'$\frac{p(N\mid M, k)}{\sum_{N=10}^{40}p(N\mid M, k)}$, $N = 10,\ldots,%d$, $M=%d$, $k=%d$' % (Nmax, M, k), color=palette[0])
ax.set_xlim((8, Nmax))
ax.set_ylim((0, 0.14))
ax.legend(fontsize=legend_fontsize, loc="upper right")
plt.show()