diff --git a/Makefile b/Makefile index 73d50b56fac770f3f7c8db176242cad8eed1d47c..a9efe4d949106523cc3d490a0db14e9dd89e1433 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ -all: +all: figures pdflatex -jobname=thesis main.tex - bibtex thesis.aux + biber thesis.bcf pdflatex -jobname=thesis main.tex pdflatex -jobname=thesis main.tex - rm -f *.bbl *.blg *.log *.aux *.toc *.out + rm -f *.bbl *.blg *.log *.aux *.toc *.out *.bcf *.run.xml + +figures: + cd figures; . venv/bin/activate; python3 generate_figures.py + +.PHONY: all figures diff --git a/bibliography/bibliography.bib b/bibliography/bibliography.bib index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fb234a9ce109473232012db2b4f849aece22c9f4 100644 --- a/bibliography/bibliography.bib +++ b/bibliography/bibliography.bib @@ -0,0 +1,41 @@ +@article{Fog2008Sampling, +author = {Agner Fog}, +title = {Sampling Methods for Wallenius' and Fisher's Noncentral Hypergeometric Distributions}, +journal = {Communications in Statistics - Simulation and Computation}, +volume = {37}, +number = {2}, +pages = {241--257}, +year = {2008}, +publisher = {Taylor \& Francis}, +doi = {10.1080/03610910701790236}, +URL = {https://doi.org/10.1080/03610910701790236}, +eprint = {https://doi.org/10.1080/03610910701790236} +} + +@BOOK{Forbes2010Statistical, + title = "Statistical Distributions", + author = "Forbes, Catherine and Evans, Merran and Hastings, Nicholas and + Peacock, Brian", + publisher = "Wiley-Blackwell", + edition = 4, + month = nov, + year = 2010, + address = "Hoboken, NJ", + language = "en" +} + +@Article{Haber1991Timestamp, +author={Haber, Stuart and Stornetta, W. Scott}, +title={How to time-stamp a digital document}, +journal={Journal of Cryptology}, +year={1991}, +month={Jan}, +day={01}, +volume={3}, +number={2}, +pages={99-111}, +abstract={The prospect of a world in which all text, audio, picture, and video documents are in digital form on easily modifiable media raises the issue of how to certify when a document was created or last changed. The problem is to time-stamp the data, not the medium. We propose computationally practical procedures for digital time-stamping of such documents so that it is infeasible for a user either to back-date or to forward-date his document, even with the collusion of a time-stamping service. Our procedures maintain complete privacy of the documents themselves, and require no record-keeping by the time-stamping service.}, +issn={1432-1378}, +doi={10.1007/BF00196791}, +url={https://doi.org/10.1007/BF00196791} +} \ No newline at end of file diff --git a/chapters/01_introduction.tex b/chapters/01_introduction.tex index 4552d103cd94bd8742d80a9f5cc948fddbe9fb72..4e3f28cbfe4b235a0c92de7737cd879206030b5c 100644 --- a/chapters/01_introduction.tex +++ b/chapters/01_introduction.tex @@ -4,7 +4,7 @@ The simplest approach to digital time-stamping relies on a trusted third party (TTP). If Alice wants to time-stamp a document and prove the document's existence at the time-stamp's time to Bob at some later time, she can ask a time-stamp authority (TSA) to cryptographically sign a secure hash of her document together with the current time. -Bob accepts the TSA's signature as proof of the document's existence at the specified time. (cite Haber1991Timestamp) +Bob accepts the TSA's signature as proof of the document's existence at the specified time. \footfullcite{Haber1991Timestamp} This scheme requires complete trust of both Alice and Bob in the impartiality of the TSA. Bob needs to trust the TSA to keep its private key secure and to never produce time-stamps for the past (an attack which I will refer to as "backdating"). @@ -61,7 +61,7 @@ Evidently, a successful backdating attack occurs when the client selects only co Let us now further assume that the client selects its $n$ witnesses from the total number of witnesses $N$ completely at random. Our problem is now equivalent to the urn problem when ``drawing without replacement''. -$e$ thus follows the hypergeometric distribution. (cite Forbes2010Statistical pp. 117-119) +$e$ thus follows the hypergeometric distribution. \footfullcite[pp.~117-119]{Forbes2010Statistical} \begin{equation} \left. P(e=k)=\binom{E}{k}\binom{N-E}{n-k} \middle/ \binom{N}{n}\right. @@ -73,13 +73,28 @@ The probability of a successful backdating attack is then given by the equation: \left. P(e=n)=\binom{E}{n} \middle/ \binom{N}{n}\right. \end{equation} +Figure~\ref{figure::backdating_probability_hypergeometric} graphs this probability as a function of $E$ for different values of $n$. + +\begin{figure} + \includegraphics{figures/backdating_probability_hypergeometric.png} + \caption{\label{figure::backdating_probability_hypergeometric} + Probability of a successful backdating attack according to the hypergeometric distribution. + $N=30$ witnesses keep records of the time-stamps issued by the TSA. + Of these witnesses, a number $E$ (plotted on the x-axis) maliciously collude with the TSA in order to backdate time-stamps. + To check a time-stamp's validity, a client consults $n$ randomly selected witnesses. + The backdating attack is successful if all $n$ selected witnesses are malicious. + As expected, the probability of a successful backdating attack increases with an increasing number of colluding witnesses $E$, reaching 1 when $N=E$. + The client can decrease the likelihood of a successful backdating attack by consulting more witnesses, as can be observed from the different graph lines. + } +\end{figure} + In practice, the selection of witnesses may not be truly random. Sticking to our example of newspaper archives, a client will likely prefer libraries which are geographically close to them. A network protocol for distributed trust may also favor witnesses with small round-trip times in order to increase performance. An attacker may be able to leverage this by placing colluding witnesses at favorable locations. We can model this by introducing a weight parameter $\omega$, where a malicious witness is $\omega$ times more likely to be selected than an honest witness. -$e$ then follows Fisher's noncentral hypergeomtric distribution. (cite Fog2008Sampling) +$e$ then follows Fisher's noncentral hypergeomtric distribution. \footfullcite{Fog2008Sampling} \begin{align} e_{\mathrm{min}}&=\max(0, n+E-N)\\ @@ -93,20 +108,85 @@ With the probability of a successful backdating attack being: P(e=n)=\left. \binom{E}{n}\omega^n \middle/ \sum_{k'=e_{\mathrm{min}}}^{e_{\mathrm{max}}} \binom{E}{k'}\binom{N-E}{n-k'}\omega^{k'} \right. \end{equation} +Figure~\ref{figure::backdating_probability_noncentral} graphs this probability as a function of $E$ for different values of $\omega$. + +\begin{figure} + \includegraphics{figures/backdating_probability_noncentral.png} + \caption{\label{figure::backdating_probability_noncentral} + Probability of a successful backdating attack according to Fisher's noncentral hypergeometric distribution. + $N=30$ witnesses keep records of the time-stamps issued by the TSA. + Of these witnesses, a number $E$ (plotted on the x-axis) maliciously collude with the TSA in order to backdate time-stamps. + To check a time-stamp's validity, a client consults $n=8$ randomly selected witnesses. + When selecting a witness, a malicious witness is $\omega$ times more likely to be selected than an honest witness. + The backdating attack is successful if all $n$ selected witnesses are malicious. + As expected, the probability of a successful backdating attack increases with an increasing number of colluding witnesses $E$, reaching 1 when $N=E$. + Increasing values of $\omega$ increase the chances of a successful backdating attack, as can be observed from the different graph lines. + For $\omega=1$, the graph matches the hypergeometric distribution of Fig.~\ref{figure::backdating_probability_hypergeometric}. + For large values of $\omega$, the graph approaches a step function with the step at $n=8$. + } +\end{figure} + Note that these equations are equivalent to the hypergeomtric distribution when $\omega=1$. This is the optimal case, limiting the probability of a successful backdating attack as much as possible. $\omega$ approaches infinity if the attacker can ensure that the client will only select malicious witnesses. -In this case, the probability of a successful backdating attack approaches 1. +In this case, the probability of a successful backdating attack approaches a step function with the step at $n=E$. \begin{equation} - \lim_{\omega\rightarrow \infty} P(e=n)=1 + \lim_{\omega\rightarrow \infty} P(e=n)= + \begin{cases} + 0 & n<E\\ + 1 & n\geq E + \end{cases} \end{equation} -This is, of course, the worst possible case for security. +\subsubsection{Increasing availability} + +\begin{figure} + \includegraphics{figures/backdating_probability_hypergeometric_available.png} + \caption{\label{figure::backdating_probability_hypergeometric_available} + Probability of a successful backdating attack according to the hypergeometric distribution when allowing witness unavailability. + $N=30$ witnesses keep records of the time-stamps issued by the TSA. + Of these witnesses, a number $E$ (plotted on the x-axis) maliciously collude with the TSA in order to backdate time-stamps. + To check a time-stamp's validity, a client consults $n=8$ randomly selected witnesses. + It accepts the time-stamp if it receives valid responses from $n'$ witnesses. + The backdating attack is successful if at least $n'$ of the selected witnesses are malicious. + Decreasing values of $n'$ increase the chances of a successful backdating attack, as can be observed from the different graph lines. + } +\end{figure} + +In a real distributed service, we can not assume that a client can always reach any witness it desires. +Network partitions or denial of service attacks may render witnesses temporarily unavailable. +We include a new parameter $n'$ into our model to accomodate this possibility. +While the client still asks $n$ randomly selected witnesses to verify a time-stamp, it accepts the time-stamp as soon as it receives $n'$ valid responses from the witnesses, with $n'<n$. +A backdating attack is now successful when $e\geq n'$. + +In the case of the hypergeometric distribution, this leaves us with the following equation. + +\begin{equation} + \left. P(e\geq n')=\sum_{k=n'}^n\binom{E}{k}\binom{N-E}{n-k} \middle/ \binom{N}{n}\right. +\end{equation} -TODO: Add lots of graphs for the probability distributions in this section. +Figure~\ref{figure::backdating_probability_hypergeometric_available} graphs this probability as a function of $E$ for different values of $n'$. + +The probability of a successful backdating attack according to Fisher's distribution is then: + +\begin{equation} + P(e\geq n')=\sum_{k=n'}^n\left. \binom{E}{k}\binom{N-E}{n-k}\omega^k \middle/ \sum_{k'=e_{\mathrm{min}}}^{e_{\mathrm{max}}} \binom{E}{k'}\binom{N-E}{n-k'}\omega^{k'} \right. +\end{equation} -TODO: The other side of trust is that Alice needs to trust service availability. -Can be solved by employing multiple TSAs. -Quickly explain this. +Figure~\ref{figure::backdating_probability_noncentral_available} graphs this probability as a function of $E$ for different values of $n'$. + +\begin{figure} + \includegraphics{figures/backdating_probability_noncentral_available.png} + \caption{\label{figure::backdating_probability_noncentral_available} + Probability of a successful backdating attack according to Fisher's noncentral hypergeometric distribution when allowing witness unavailability. + $N=30$ witnesses keep records of the time-stamps issued by the TSA. + Of these witnesses, a number $E$ (plotted on the x-axis) maliciously collude with the TSA in order to backdate time-stamps. + To check a time-stamp's validity, a client consults $n=8$ randomly selected witnesses. + It accepts the time-stamp if it receives valid responses from $n'$ witnesses. + When selecting a witness, a malicious witness is $\omega=10$ times more likely to be selected than an honest witness. + The backdating attack is successful if at least $n'$ of the selected witnesses are malicious. + Decreasing values of $n'$ increase the chances of a successful backdating attack, as can be observed from the different graph lines. + } +\end{figure} diff --git a/figures/backdating_probability_hypergeometric.png b/figures/backdating_probability_hypergeometric.png new file mode 100644 index 0000000000000000000000000000000000000000..442621d1da734fc48388d8b35f3ab6dba8b06604 Binary files /dev/null and b/figures/backdating_probability_hypergeometric.png differ diff --git a/figures/backdating_probability_hypergeometric_available.png b/figures/backdating_probability_hypergeometric_available.png new file mode 100644 index 0000000000000000000000000000000000000000..73fa44a8ff950674936c4c679dcbcdfce0ed93b6 Binary files /dev/null and b/figures/backdating_probability_hypergeometric_available.png differ diff --git a/figures/backdating_probability_noncentral.png b/figures/backdating_probability_noncentral.png new file mode 100644 index 0000000000000000000000000000000000000000..351741f3b77267b8f2a576a7236b6868528a0222 Binary files /dev/null and b/figures/backdating_probability_noncentral.png differ diff --git a/figures/backdating_probability_noncentral_available.png b/figures/backdating_probability_noncentral_available.png new file mode 100644 index 0000000000000000000000000000000000000000..4d7ea0617dac6a15cb7880710d7ba6d35221a992 Binary files /dev/null and b/figures/backdating_probability_noncentral_available.png differ diff --git a/figures/generate_figures.py b/figures/generate_figures.py new file mode 100644 index 0000000000000000000000000000000000000000..2e1d8de8028a9fbdf318acf4a39ade4ae3b6bcde --- /dev/null +++ b/figures/generate_figures.py @@ -0,0 +1,66 @@ +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import hypergeom, nchypergeom_fisher + +WIDTH = 6.225 # \textwidth in inches + +# hypergeom.pmf(k, N, E, n) +# nchypergeom_fisher.pmf(k, N, E, n, w) + +plt.rcParams.update({'figure.dpi': 300}) +plt.rcParams.update({'font.size': 10}) +plt.rcParams.update({'font.family': 'serif'}) +plt.rcParams.update({'text.usetex': True}) +plt.rcParams.update({'text.latex.preamble': '\\usepackage{mathpazo}'}) + +N = 30 +E = np.arange(N+1) +plt.figure(figsize=(WIDTH, WIDTH/2)) +plt.plot(E, hypergeom.pmf(1, N, E, 1), "o-", mec="w", label="$n=1$") +plt.plot(E, hypergeom.pmf(2, N, E, 2), "s-", mec="w", label="$n=2$") +plt.plot(E, hypergeom.pmf(4, N, E, 4), "D-", mec="w", label="$n=4$") +plt.plot(E, hypergeom.pmf(8, N, E, 8), "p-", mec="w", label="$n=8$") +plt.xlabel("\\# of colluding witnesses $E$") +plt.ylabel("Probability $P(e=n)$") +plt.legend() +plt.title("Backdating probability (Hypergeometric distribution, $N=30$)") +plt.tight_layout() +plt.savefig("backdating_probability_hypergeometric.png") + +n = 8 +plt.figure(figsize=(WIDTH, WIDTH/2)) +plt.plot(E, nchypergeom_fisher.pmf(n, N, E, n, 1), "o-", mec="w", label="$\\omega=1$") +plt.plot(E, nchypergeom_fisher.pmf(n, N, E, n, 10), "s-", mec="w", label="$\\omega=10$") +plt.plot(E, nchypergeom_fisher.pmf(n, N, E, n, 100), "D-", mec="w", label="$\\omega=100$") +plt.plot(E, nchypergeom_fisher.pmf(n, N, E, n, 1000), "p-", mec="w", label="$\\omega=1000$") +plt.xlabel("\\# of colluding witnesses $E$") +plt.ylabel("Probability $P(e=n)$") +plt.legend() +plt.title("Backdating probability (Fisher's distribution $N=30, n=8$)") +plt.tight_layout() +plt.savefig("backdating_probability_noncentral.png") + +plt.figure(figsize=(WIDTH, WIDTH/2)) +plt.plot(E, hypergeom.cdf(n, N, E, n) - hypergeom.cdf(7, N, E, n), "o-", mec="w", label="$n'=8$") +plt.plot(E, hypergeom.cdf(n, N, E, n) - hypergeom.cdf(3, N, E, n), "o-", mec="w", label="$n'=4$") +plt.plot(E, hypergeom.cdf(n, N, E, n) - hypergeom.cdf(1, N, E, n), "o-", mec="w", label="$n'=2$") +plt.plot(E, hypergeom.cdf(n, N, E, n) - hypergeom.cdf(0, N, E, n), "o-", mec="w", label="$n'=1$") +plt.xlabel("\\# of colluding witnesses $E$") +plt.ylabel("Probability $P(e\geq n')$") +plt.legend() +plt.title("Backdating vs. availability (Hypergeometric distribution, $N=30, n=8$)") +plt.tight_layout() +plt.savefig("backdating_probability_hypergeometric_available.png") + +w = 10 +plt.figure(figsize=(WIDTH, WIDTH/2)) +plt.plot(E, nchypergeom_fisher.cdf(n, N, E, n, w) - nchypergeom_fisher.cdf(7, N, E, n, w), "o-", mec="w", label="$n'=8$") +plt.plot(E, nchypergeom_fisher.cdf(n, N, E, n, w) - nchypergeom_fisher.cdf(5, N, E, n, w), "o-", mec="w", label="$n'=6$") +plt.plot(E, nchypergeom_fisher.cdf(n, N, E, n, w) - nchypergeom_fisher.cdf(3, N, E, n, w), "o-", mec="w", label="$n'=4$") +plt.plot(E, nchypergeom_fisher.cdf(n, N, E, n, w) - nchypergeom_fisher.cdf(0, N, E, n, w), "o-", mec="w", label="$n'=1$") +plt.xlabel("\\# of colluding witnesses $E$") +plt.ylabel("Probability $P(e\geq n')$") +plt.legend() +plt.title("Backdating vs. availability (Fisher's distribution, $N=30, n=8, \\omega=10$)") +plt.tight_layout() +plt.savefig("backdating_probability_noncentral_available.png") diff --git a/main.tex b/main.tex index 9d24c752bde7e5b48c8794b833a321f59e309de0..762c2a24831daecc0f82dc3c32b1eebe84ae35ce 100644 --- a/main.tex +++ b/main.tex @@ -25,7 +25,9 @@ \usepackage{graphicx} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%_YOUR_PACKAGES_%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % UTILITY PACKAGES -\usepackage{cite} +\usepackage[style=authoryear]{biblatex} +\addbibresource{bibliography/bibliography.bib} +%\usepackage{cite} \usepackage{comment} % enables block comments via \begin{comment} ... \end{comment} environment \usepackage{amsmath} % for all the good maths stuff like the align environment \usepackage{amsthm} % for definitions, lemmas, etc. - also for defining your own stuff, eg below: @@ -33,7 +35,7 @@ %\newtheorem{definition}{Definition}[section] % definition setup and call % IMAGE PACKAGES \usepackage{wrapfig} % create figures with wrapped text around it -\usepackage{caption} % better captions for figures +\usepackage[format=plain,labelfont=bf,font=footnotesize]{caption} % better captions for figures \usepackage{subcaption} % captions for subfigures % PRESENTATION PACKAGES \usepackage{booktabs} % for professional tables @@ -59,13 +61,14 @@ \mitverteidigung % entfernen, falls keine Verteidigung erfolgt \makeTitel +\pagenumbering{gobble} % page numbers invisible for TOC, abstract and filler pages + \begin{abstract} \textbf{Abstract.} Write your abstract here. \end{abstract} \newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%_TABLE_OF_CONTENTS_%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\pagenumbering{gobble} % page numbers invisible for TOC and filler pages \tableofcontents \cleardoublepage % deactivate for one-sided printing %\newpage % activate for one-sided printing @@ -80,9 +83,9 @@ % remember to edit \addbibresource in the TEMPLATE_PACKAGSES area above! \newpage \pagenumbering{roman} % start roman page numbers from here (optional) -\nocite{*} -\bibliographystyle{abbrv} -\bibliography{bibliography/bibliography.bib} +\printbibliography +%\bibliographystyle{abbrv} +%\bibliography{bibliography/bibliography.bib} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%_APPENDIX_%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section*{Appendix} \label{Appendix} \addcontentsline{toc}{section}{Appendix} % adds entry to table of contents diff --git a/thesis.pdf b/thesis.pdf index 4581162f5385712d6483e2b2dda210840c812940..e58c1adcfc65289c3f5543df2fc6f3dcf8a3071f 100644 Binary files a/thesis.pdf and b/thesis.pdf differ