[f,F] = gaussian(xvals, mu, stdev)

%%
% NS248 HW2 Problem 2
xvals = linspace(-6,6,10000);
mu = 0;
stdev = 1;
[f,F] = gaussian(xvals, mu, stdev);
Pr1stdev = 1 - interp1(xvals, F, stdev);
Pr2stdevs = 1 - interp1(xvals, F, 2*stdev);
Pr3stdevs = 1 - interp1(xvals, F, 3*stdev);
%%
% Problem 2
n=1000;
% number of draws
sim_x = randomdraw(xvals, F, n);
sim_Pr1stdev = length(find(sim_x>(stdev-mu)))/n;
sim_Pr2stdev = length(find(sim_x>(2*stdev-mu)))/n;
sim_Pr3stdev = length(find(sim_x>(3*stdev-mu)))/n;
function[f,F] = gaussian(xvals,mu,stdev,figures)
f = pdf('normal',xvals,mu,stdev);
F=cdf('normal',xvals,mu,stdev);
if figures
figure
plot(xvals,f)
xlabel('x')
ylabel('probability')
title(strcat('Gaussian pdf, mu=',num2str(mu),', stdev=',num2str(stdev)))
set(gca,'FontSize',16)
figure
plot(xvals,F)
xlabel('x')
ylabel('probability')
title(strcat('Gaussian cdf, mu=',num2str(mu),', stdev=',num2str(stdev)))
set(gca,'FontSize',16)
end
function [sim_x] = randomdraw(xvals, F, n)
random_y = rand(1,n);
sim_x = interp1(F,xvals, random_y);
histogram(sim_x);
xlabel('x')
xlim([xvals(1) xvals(end)])
ylabel('number')
title(strcat('distribution of ',num2str(n),' trials'))
% HW2 problem 4
%%
n = 50;
dist_difference = [];
for p1 = [0.01 0.05 0.2 0.45]
kvals = 0:n;
for k=kvals
f(k+1)=factorial(n)/(factorial(k)*factorial(n-k))*p1^k*(1-p1)^(n-k);
end
F = cumsum(f);
mu = n*p1;
stdev = sqrt(n*p1*(1-p1));
xvals = [-0.5:0.1:n+0.5];
[Gf, GF] = gaussian(xvals,mu,stdev,0);
%%
difference = [];
for k = 0:n
difference(k+1) = abs(trapz(xvals(find(xvals>=k0.5,1):find(xvals>=k+0.5,1)),Gf(find(xvals>=k-0.5,1):find(xvals>=k+0.5,1)))f(k+1));
end
dist_difference(end+1) = sum(difference);
%%
figure('position',[100,100,1200,400])
subplot(1,2,1)
hold on
bar(kvals,f,1,'facecolor','b','edgecolor','k')
title(strcat('pdf, n=',num2str(n),', p=',num2str(p1,2)))
xlabel('k')
ylabel('probability')
plot(xvals,Gf,'r','linewidth',1.5)
xlim([0 n])
legend('binomial','Gaussian','location','northeast')
set(gca,'fontsize',16)
hold off
subplot(1,2,2)
hold on
bar(kvals,F,1,'facecolor','b','edgecolor','k')
title(strcat('cdf, n=',num2str(n),', p=',num2str(p1,2)))
xlabel('k')
ylabel('probability')
xlim([0 n])
plot(xvals,GF,'r','linewidth',1.5)
legend('binomial','Gaussian','location','southeast')
set(gca,'fontsize',16)
end
%NS248 HW2 Problem 5
%%
clear all; close all;
draws = 10000;
mu = 10;
sigma = 5;
yvals = normrnd(mu,sigma,[1 draws]);
figure
histogram(yvals,-5:0.25:25)
title('histogram of normrnd output')
xlabel('x')
ylabel('y')
set(gca,'fontsize',16)
%%
n = 5;
repeats = 1000;
indices = nan(n,repeats);
indices = randi([0 length(yvals)],n,repeats);
data = nan(n,repeats);
data = yvals(indices);
avgs = nan(1,repeats);
avgs(1,:) = mean(data);
s_nminus1 = nan(1,repeats);
s_n = nan(1,repeats);
s_nminus1 (1,:) = sqrt(sum((data(:,:)-repmat(avgs,n,1)).^2/(n-1)));
s_n (1,:) = sqrt(sum((data(:,:)-repmat(avgs,n,1)).^2/(n)));
sem_nminus1 = nan(1,repeats);
sem_n = nan(1,repeats);
sem_nminus1 (1,:) = s_nminus1./sqrt(n-1);
sem_n (1,:) = s_n./sqrt(n);
[sem_h xvals]=hist(sem_nminus1,0:0.25:20);
figure('position',[100 100 1200 500])
subplot(1,2,1)
bar(xvals, sem_h/repeats)
xlim([0 max(sem_nminus1)+2])
title('pdf')
xlabel('SEM , bin size = 0.25')
ylabel('probability')
set(gca,'fontsize',16)
subplot(1,2,2)
bar(xvals, cumsum(sem_h/repeats))
xlim([0 max(sem_nminus1)+2])
title('cdf')
xlabel('SEM , bin size = 0.25')
ylabel('probability')
set(gca,'fontsize',16)