Update zf-statistik.tex

Errors and improvements
2020-01-28 21:28:54 +01:00 · 2020-01-28 21:28:54 +01:00 · cb372c2a9d
commit cb372c2a9d
parent 954cdbe437
1 changed files with 31 additions and 34 deletions
--- a/zf-statistik.tex
+++ b/zf-statistik.tex
@ -374,13 +374,14 @@ $P(X \geq c)$ für verschiedene $c$
 		\end{tabular}
 	\end{center}
 	oft ist $H_0: \pi = 1/2$ (= reiner Zufall). Man testet also gegen Zufall.
-	\item Teststatistik $T$ (Anzahl treffer bei $n$ Versuchen), Verteilung unter $H_0: T \sim \mathrm{Bin}(n,\pi_0)^3$
+	\item Teststatistik $T$ (Anzahl treffer bei $n$ Versuchen), Verteilung unter $H_0: T \sim \mathrm{Bin}(n,\pi_0)$
 	\item Festlegen von Signifikanzniveau $\alpha$ (meist $\alpha = 0.05$ oder $\alpha = 0.01$)
 	\item Bestimmung Verwerfungsbereich
 	$$K = \begin{cases}
 		[0,c_u] \cup [c_0,n] & H_A: \pi \neq \pi_0 \\ [c,n] & H_A: \pi > \pi_0 \\ [0,c] & H_A: \pi < \pi_0
 	\end{cases}$$
-  Wobei $c$ der Wert ist bei dem $P(X \leq c) = \alpha$
+  Wobei $c$ der Wert ist bei dem noch $P(X \leq c) \leq \alpha$ für $H_A: \pi < \pi_0$),\\
+  analog $P(X \geq c) \leq \alpha$ für $H_A: \pi > \pi_0$
 	\item Testentscheid: Ist $t \in K$? Falls ja wird $H_0$ verworfen, falls nicht wird sie als korrekt angenommen\footnote{Achtung: Das heisst nicht, dass $H_0$ gültig ist! (Falsifizierbarkeit)}
 \end{enumerate}

@ -392,13 +393,13 @@ Also berechne mit Tabelle (schaue wo $P(X=x) \leq \alpha$ für verschiedene $x$
 \subsubsection{Normalapproximation der Binomialverteilung}
 Für eine Verteilung $X \sim \mathrm{Binom}(n,\pi)$ und $\alpha = 0.05$ gilt für einseitige Tests:
 $$c \approx \begin{cases}
-  n\pi_0+1.64\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; bei \;} H_0: \pi > \pi_0 \mathrm{\; (abgerundet)} \\
-  n\pi_0-1.64\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; bei \;} H_0: \pi < \pi_0 \mathrm{\; (aufgerundet)} \\
+  n\pi_0+1.64\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; bei \;} H_0: \pi > \pi_0 \mathrm{\; (aufgerundet)} \\
+  n\pi_0-1.64\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; bei \;} H_0: \pi < \pi_0 \mathrm{\; (abgerundet)} \\
 \end{cases}$$

 Für einen zweiseitigen Test ($\pi \neq \pi_0$) gilt:
-$$c_0 \approx n\pi_0+1.96\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; (abgerundet)}$$
-$$c_u \approx n\pi_0-1.96\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; (aufgerundet)}$$
+$$c_0 \approx n\pi_0+1.96\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; (aufgerundet)}$$
+$$c_u \approx n\pi_0-1.96\sqrt{n\pi_0(1-\pi_0)} \mathrm{\; (abgerundet)}$$
 \subsubsection{Fehler 1. und 2. Art}
 \label{sec:fehler12}
 \begin{enumerate}
@ -415,13 +416,15 @@ $$\mathrm{Macht}:=1-P(\mathrm{Fehler \; 2. \; Art}) = P_{H_A} (X \in K) = P(X \g
 Idee: Wie gross muss eine Stichprobe sein, damit mit einer bestimmten Macht $\beta=x$ eine Hypothese bewiesen werden kann auf Signifikanzniveau $\alpha$?

 \subsubsection{P-Wert}
-Der P-Wert ist ein Wert zwischen 0 und 1, der angibt, wie gut Nullhypothese und Daten zusammenpassen.
-
+Gibt die Wahrscheinlichkeit an, dass die Beobachtung oder extremeres Ereigniss eintritt unter $H_0$
+$$P_{H_0}(T \geq t)$$
+Es ist auch das kleinste Signifikanzniveau $\alpha$, auf dem $H_0$ gerade noch verworfen wird. \\
+Also falls $p$-Wert $> \alpha$ wird $H_0$ beibehalten.
 \subsubsection{Vertrauensintervall (VI)}
 \label{sec:vertrauensintervall}
 $$I:=\{\pi_0;\; \mathrm{Nullhypothese} \; H_0:\pi = \pi_0 \mathrm{wird \; beibehalten}\}$$
 Für grosse $n$ gilt
-$$I \approx \frac{x}{n} \pm \sqrt{\frac{x}{n}(1-\frac{x}{n})\frac{1}{n}}$$
+$$I \approx \frac{x}{n} \pm 1.96\sqrt{\frac{x}{n}(1-\frac{x}{n})\frac{1}{n}}$$
 Die Werte von $\pi_0$ bei denen $H_0: \pi = \pi_0$ nicht verworfen wird, ist ein $(1-\alpha)$-VI.
 $$P_\pi(\pi \in I(X) \gtrapprox 1-\alpha)$$
 Ein $(1-\alpha)$-VI, enthält den wahren Parameter $\pi$ mit einer Wahrscheinlichkeit von $(1-\alpha)$ \\
@ -675,7 +678,21 @@ Die Unabhängigkeit führt dazu, dass die Genauigkeit des arithmetischen Mittels

 \subsection{Statisitk für eine Stichprobe}
 % Wasn't able to fit it into the third-columns
-Siehe \textit{Abb. \ref{fig:tests}} im \hyperref[sec:anhang]{Anhang}.
+\begin{figure}[H]
+  \begin{tabular}{l|lccc|c}
+  \hline
+  \multirow{2}{*}{} & \multicolumn{4}{c|}{Annahmen} & \multirow{2}{*}{Macht} \\
+   & \multicolumn{1}{c}{\begin{tabular}[c]{@{}c@{}}$\sigma_X$ \\ bekannt \end{tabular}} & $X_i \sim \mathcal{N}$  & \begin{tabular}[c]{@{}c@{}}sym. \\ Verteilung \end{tabular} & i.i.d. &  \\
+  \hline\hline
+  z-Test & \multicolumn{1}{c}{$\sbullet$ } & $\sbullet$  & $\sbullet$  & $\sbullet$  & $****$  \\
+  t-Test &  & $\sbullet$  & $\sbullet$  & $\sbullet$  & $***$  \\
+  Wilcoxon &  & \multicolumn{1}{l}{} & $\sbullet$  & $\sbullet$  & $**$  \\
+  Vorzeichen &  & \multicolumn{1}{l}{} & \multicolumn{1}{l}{} & $\sbullet$  & $*$  \\
+  \hline
+  \end{tabular}
+  \caption{Übersicht der verschiedenen Tests für $\mu$}
+  \label{tab:tests}
+\end{figure}

 \subsubsection{Punktschätzung}
 Betrachtung von Daten $x_1, x_2, ...,x_n$ als Realisierungen von $X_1, X_2, ..., X_n$ i.i.d. \\
@ -810,7 +827,9 @@ Ligt vor falls:
 Die Daten entsprechen
 $$x_1,...x_n \mathrm{unter \; Versuchsbedingung \; 1}$$
 $$y_1,...y_n \mathrm{unter \; Versuchsbedingung \; 2}$$
-wobei dasselbe $n$ für beide nötig ist.
+wobei dasselbe $n$ für beide nötig ist. \\
+\textbf{Gepoolte Varianz}
+$$S_\mathrm{pool}=\sqrt{\frac{\hat{\sigma}_X^2+\hat{\sigma}_Y^2}{2}}$$

 \subsubsection{t-Test für gepaarte Stichproben}
 $$d_i = x_i - y_i, i \in \mathbb{N} \leq n$$
@ -1111,14 +1130,6 @@ Hier können einzelne Variablen signifikant sein und andere nicht. Bei starker K
 Es gilt wie in \ref{sec:r2}
 $$R^2 = \hat{\rho}_{Y\hat{Y}}^2$$

-\begin{center}
-	\rule{.5\linewidth}{0.25pt}
-\end{center}
-
-\begin{center}
-	\rule{\linewidth}{0.25pt}
-\end{center}
-
 \scriptsize

 \end{multicols*}
@ -1387,21 +1398,6 @@ fit <- lm(y ~ x1 + x2)

 \section{Anhang}
 \label{sec:anhang}
-\begin{figure}[H]
-  \begin{tabular}{l|llll|c|c}
-    \hline
-    \multirow{2}{*}{} & \multicolumn{4}{c}{Annahmen} & \multicolumn{1}{|c}{\multirow{2}{*}{\begin{tabular}{l}$n_\mathrm{min}$ bei \\ $\alpha = 0.05$\end{tabular}}} & \multicolumn{1}{|c}{\multirow{2}{*}{\begin{tabular}{c}Macht \\ für Bsp.\end{tabular}}} \\
-     & \multicolumn{1}{c}{\begin{tabular}{c}$\sigma_X$ \\ bekannt\end{tabular}} & \multicolumn{1}{c}{$X_i \sim \mathcal{N}$} & \multicolumn{1}{c}{\begin{tabular}{c}sym. \\ Verteilung\end{tabular}} & \multicolumn{1}{c}{i.i.d.} & \multicolumn{1}{|c|}{} & \multicolumn{1}{c}{} \\
-     \hline\hline
-    z-Test & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c|}{$\sbullet$} & 1 & $****$ \\
-    t-Test &  & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c|}{$\sbullet$} & 2 & $***$ \\
-    Wilcoxon &  &  & \multicolumn{1}{c}{$\sbullet$} & \multicolumn{1}{c|}{$\sbullet$} & 6 & $**$ \\
-    Vorzeichen &  &  &  & \multicolumn{1}{c|}{$\sbullet$} & 5 & $*$ \\
-    \hline
-  \end{tabular}
-  \caption{Übersicht der verschiedenen Tests für $\mu$}
-  \label{fig:tests}
-\end{figure}

 \section*{Referenzen}
 \begin{enumerate}
@ -1421,6 +1417,7 @@ fit <- lm(y ~ x1 + x2)
 \end{itemize}

 \doclicenseImage \\
+Dieses Dokument ist unter (CC BY-SA 4.0) freigegeben \\
 \faGlobe \kern 1em \url{https://n.ethz.ch/~jannisp} \\
 \faGit \kern 0.88em \url{https://git.thisfro.ch/thisfro/statistik-zf} \\
 Jannis Portmann, HS19