import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlabimport secret
import scipy.stats as statsClass for simulating Bernoulli trials
class FlipSim(object):    def __init__(self, success_rate):
        self.success_rate = success_rate
        self.trials = []    def get_success_rate(self):
        return self.success_rateReturns the result of a single 'flip'
    def flip(self):        if random.random() <= self.success_rate:
            return True
        else:
            return FalseReturns a list of a single trial of num_flips flips
    def flips(self, num_flips):        return [self.flip() for i in range(num_flips)]Returns a list of num_trials trials of flips_per_trials each
    def flip_trials(self, flips_per_trial, num_trials):        self.trials = [self.flips(flips_per_trial) for i in range(num_trials)]
        return self.trialssim = FlipSim(secret.value)10,000 trials of 1,000 flips each. Experiment with different combinations
of flips_per_trial and num_trials.
trials = sim.flip_trials(1000, 10000)Descriptive stats
means = [np.mean(trial) for trial in trials]
mean = np.mean(means)
sigma = np.std(means)Results should be similar to:
The mean is 0.5300304
The standard deviation is 0.0157946977128  
print('The mean is', mean)
print('The standard deviation is', sigma)68% of the values lie between 0.514235702287 and 0.545825097713
95% of the values lie between 0.498441004574 and 0.561619795426
99.7% of the values lie between 0.482646306861 and 0.577414493139
Thus, on the basis of this particular simulation we can say with 95%
confidence that secret.value lies between 0.498 and 0.562.
print('68% of the values lie between', mean - sigma, 'and', mean + sigma)
print('95% of the values lie between', mean - 2*sigma, 'and', mean + 2*sigma)
print('99.7% of the values lie between', mean - 3*sigma, 'and', mean + 3*sigma)numpy.linspace returns evenly spaced numbers over a specified interval.
In this case the range of x is defined by the mean plus/minus three
standard deviations.
x = np.linspace(mean - 3 * sigma, mean + 3 * sigma, 100)Create a histogram of the means
plt.hist(means, bins = 200)Draw the probability distribution function (pdf) of the normal distribuation
with mean secret.value and standard deviation of sigma within the range
of x.
plt.plot(x, 10 * mlab.normpdf(x, secret.value, sigma))Show the plot
plt.show()scipy.stats Create three different Bernoulli trials of 1,000 flips each with different success rates
x1 = FlipSim(0.5).flips(1000)
x2 = FlipSim(0.52).flips(1000)
x3 = FlipSim(0.57).flips(1000)Use Independent Two-Sampled T-test to test the Null Hypothesis that the
means of two independent samples are the same.
Results should be similar though not identical to:
Ttest_indResult(statistic=0.53653387639428951, pvalue=0.59164936190673534)
Null hypothesis is not rejected
print(stats.ttest_ind(x1, x2))Ttest_indResult(statistic=-1.2543243107046314, pvalue=0.20987087051240788)
Null hypothesis is rejected
print(stats.ttest_ind(x1, x3))Use ANOVA to test the Null Hypothesis that the means of more than two
independent samples are all the same.
Results should be similar to:
F_onewayResult(statistic=9.1459470595680799, pvalue=0.0001096574199724701)
Null Hypothesis is rejected
print(stats.f_oneway(x1, x2, x3))sample = [1] * 599 + [0] * 401The standard deviation doesn't tell us what we want to know, because we are dealing with a single sample rather than the means of many samples.
mu = np.mean(sample)
sigma = np.std(sample)
print('mu = {}, std = {}'.format(mu, sigma))
plt.hist(sample)
plt.show()To estimate the mean from a single sample, use the Standard Error of the Means (SEM)
SEM = sigma / (len(sample) ** 0.5)standard error of the means is 0.0154983547514
print('standard error of the means is', SEM)
x = np.linspace(mu - 3 * SEM, mu + 3 * SEM, 100)
plt.plot(x, mlab.normpdf(x, mu, SEM))
plt.show()SEM is implemented directly in scipy.stats
SEM = stats.sem(sample)95% confidence that population mean is between 0.5679877805090032
and 0.6300122194909967
print('95% confidence that population mean is between {} and {}'.
      format(mu - 2 * SEM, mu + 2 * SEM))Use the One-sample T-test to compare the mean of a sample with an expected mean
test_mean = 0.5 #
statistic, p_value = stats.ttest_1samp(sample, test_mean)For one-sample T-test with given sample and a mean of 0.5 p-value is
2.6279100509e-10
print('For one-sample T-test with given sample and a mean of',
      test_mean, 'p-value is', p_value)The following code is based on the ending conditions for Ash's algorithmic piece. Once the piece is at least 32 notes long, the next C4 produced initiates the ending of the piece. How many notes beyond 32 is it expected before C4 occurs?
def trial():keep count of the notes produced before C4
    count = 0
    while True:G3 = 0, A3 = 0, ..., C5 = 10
        value = random.randint(0, 10)Stop trial if value is C4
        if value == 3:
            break
        count += 1
    return countGenerate counts for 10,000 trials
N = 10000
trials = [trial() for _ in range(N)]
counts = Counter(trials)Create x and y values
xs = range(1, max(counts) + 1)
vals = [counts[x] for x in xs]
ys = np.array(vals) / NPlot the probability mass function of the resulting geometric distribution
plt.title('Probability mass function\ngeometric distribution')
plt.ylabel('Probability')
plt.xlabel('Random selections until success')
plt.plot(xs, ys)
plt.show()Plot the cumulative distribution function
ys = np.cumsum(vals) / N
plt.title('Cumulative distribution function\ngeometric distribution')
plt.ylabel('Cumulative probability')
plt.show()