From cf29925273aca7c6c5c21b83b7fe47df5d1dbba5 Mon Sep 17 00:00:00 2001
From: Dan Davison <dandavison7@gmail.com>
Date: Thu, 2 Jul 2020 13:32:28 -0400
Subject: [PATCH] Add uncommitted *.tex

---
 .gitignore                                    |   1 +
 computer-science--concurrency.tex             |  33 +++++
 ...--error-detection-and-error-correction.tex |   0
 discrete-math--chess.tex                      |  76 ++++++++++
 elaenia.tex                                   | 134 ++++++++++++++++++
 hmm.tex                                       |   6 +
 linear-algebra-kun-pimbook-exercises.tex      |  34 +++++
 neural_networks.tex                           |  12 ++
 xenops.tex                                    |  51 +++++++
 9 files changed, 347 insertions(+)
 create mode 100644 computer-science--concurrency.tex
 create mode 100644 computer-science--error-detection-and-error-correction.tex
 create mode 100644 discrete-math--chess.tex
 create mode 100644 elaenia.tex
 create mode 100644 hmm.tex
 create mode 100644 linear-algebra-kun-pimbook-exercises.tex
 create mode 100644 neural_networks.tex
 create mode 100644 xenops.tex

diff --git a/.gitignore b/.gitignore
index b8a149e..dfd821c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@
 _region_.tex
 fragments.tex
 junk.tex
+z.tex
 .ignore
diff --git a/computer-science--concurrency.tex b/computer-science--concurrency.tex
new file mode 100644
index 0000000..1e43eef
--- /dev/null
+++ b/computer-science--concurrency.tex
@@ -0,0 +1,33 @@
+\section{aio}
+\begin{itemize}
+\item A promise is (result, callbacks)
+\item To {\it subscribe} to a promise means to add your callback (aio-listen).
+\item Callbacks take one argument: the {\it value function} that will be supplied when the callback is resolved.
+\item To {\it resolve} a promise means to call all the callbacks, passing them a {\it value function} (aio-resolve)
+\item The simplest example of the promise lifecyle is an non-async function creating and resolving a promise:
+
+\begin{minted}{emacs-lisp}
+(setq lexical-binding t)
+
+(defun make-call-function-promise (seconds function)
+  (let ((promise (aio-promise))
+        (value-function (lambda () (funcall function))))
+    (aio-listen promise (lambda (value-function) (funcall value-function)))
+    (prog1 promise
+      (run-at-time seconds nil #'aio-resolve promise value-function))))
+
+(make-call-function-promise 1 (lambda () (message "Hello world!")))
+\end{minted}
+
+\subsection{Chaining promises}
+
+To chain two promises means to create an awaitable object which:
+\begin{enumerate}
+\item Can be kicked off
+\item When resolved, will kick off the second
+\end{enumerate}
+
+
+\begin{minted}{emacs-lisp}
+  (aio-listen promise-1 (lambda (value-function) (schedule promise-2)))
+\end{minted}
diff --git a/computer-science--error-detection-and-error-correction.tex b/computer-science--error-detection-and-error-correction.tex
new file mode 100644
index 0000000..e69de29
diff --git a/discrete-math--chess.tex b/discrete-math--chess.tex
new file mode 100644
index 0000000..25c54af
--- /dev/null
+++ b/discrete-math--chess.tex
@@ -0,0 +1,76 @@
+\url{https://erikbern.com/2014/11/29/deep-learning-for-chess.html}
+
+In chess there are 64 squares and 12 piece types.
+
+\begin{definition}
+  A {\it position} comprises two bits of information:
+  \begin{enumerate}
+  \item An assignment to each of the 64 squares of one of the 12 piece types, or EMPTY.
+  \item Whether white or black is to play next.
+  \end{enumerate}
+  A {\it valid position} is a position for which the counts of the piece types are less than or equal to their initial counts.
+
+  A {\it terminal position} is a valid position which is a win for white, a win for black, or a draw.
+\end{definition}
+Define the set $F = \{-1, 0, 1\}$ of labels. We label a terminal position $p$ as follows:
+\begin{align*}
+  f(p) =
+  \begin{cases}
+  -1 & \text{if $p$ is a win for white} \\
+  0  & \text{if $p$ is a draw} \\
+  +1 & \text{if $p$ is a win for black}.
+  \end{cases}
+\end{align*}
+We extend this definition recursively to non-terminal positions as follows. Define $M(p)$ to be the set of
+valid positions reachable in one move from $p$. The label at a non-terminal position $p$ is
+\begin{align*}
+  f(p) = \min_{p' \in M(p)} f(p').
+\end{align*}
+Thus, for
+
+We now want to extend this definition to assign labels $f(p) \in F$ to non-terminal positions.
+
+We do so (conceptually) according to the following algorithm:
+
+\begin{definition*}
+  \begin{itemize}
+  \item Define $M(p)$ to be the set of valid positions reachable in one move from $p$.
+  \item Define $M^{-1}(p)$ to be the set of valid positions from which $p$ is reachable in one move.
+  \end{itemize}
+\end{definition*}
+
+#+begin_src python
+def visit(position):
+    if position.is_terminal:
+        return
+
+    ancestors = position.get_ancestors()
+
+    for
+
+
+#+end_src
+
+\begin{enumerate}
+\item Let $T_w$ be the set of all terminal positions (white to play).
+\item For all $$$q \in T_w$
+\end{enumerate}
+
+Let $p$ (white to play) be a valid non-terminal position, and let $M(p)$ be the set of valid positions reachable in one move from $p$.
+
+Note that if some $q = -1$ for $q \in M(p)$ then white can win in one move. Accordingly, we define $f(p) = -1$ if $-1 \in M(p)$.
+
+Alternatively, suppose that $-1 \not\in M(p)$.
+
+Let $q$ be a terminal position (black to play) and consider the set $P$ of positions (white to play) from which $q$ is reachable in one move.
+
+Note that if $q = -1$ then white can win in one move from all positions $p \in P$. Therefore we will define $p = -1$ for all .$p \in P$.
+
+
+
+
+
+
+Define $f(p) \in \{-1, 0, 1\}$ as follows:
+
+Assuming that
diff --git a/elaenia.tex b/elaenia.tex
new file mode 100644
index 0000000..955f95f
--- /dev/null
+++ b/elaenia.tex
@@ -0,0 +1,134 @@
+\documentclass{article}
+
+\begin{document}
+
+\section{Objective}
+
+We record an audio signal on a smart phone and we want to infer:
+\begin{enumerate}
+\item The set of species that are vocalising during the recording
+\item Points in time at which each vocalisation starts and stops
+
+
+\section{VGGish}
+\footnote{
+  \url{https://github.com/tensorflow/models/tree/master/research/audioset/vggish#input-audio-features}
+  \url{https://arxiv.org/pdf/1903.00765.pdf} I am slightly confused.
+  Google describe the initial step as: Audio signal is converted to a log-mel spectrogram via STFT (window-size 25ms, hop-size 10ms, Hann window) I guess I don't know what hop size is.
+}
+\begin{enumerate}
+\item Input audio is divided into non-overlapping 0.96s segments (treated as distinct observations for training)
+\item Each observation is represented as a 96 x 64 pixel input spectrogram image (a log mel spectrogram with 10ms frames).
+\item The penultimate (``bottleneck'') layer has 128 units: these values are the ``embedding'' that summarizes one 0.96s input.
+
+\item {\bf VGGish Training}:
+  \begin{enumerate}
+  \item The VGGish network was trained by Google on a large YouTube audio data set.
+  \item Basically it learns to associate 0.96s frames with the set of tags of the parent image.
+  \item The trained VGGish is henceforth a fixed function that maps a 0.96s frame to an ``embedding''
+    representation.
+  \item So whatever that embedding is, it should be something that contains good information for classifying to
+    sound class.
+  \item Note that VGGish has discarded all information about patterns that extend over more than 0.96s.
+  \item OTOH, its classifications are based on many frames per sound type
+  \item So the embedding is going to be something fairly generic: for every sound type, for any 0.96s frame, if
+    that frame is informative for classification, then the embedding will capture some information from it.
+  \end{enumerate}
+
+\item {\bf Bird Audio Classifier training}:
+  \begin{enumerate}
+  \item Each labeled training recording is broken into 0.96s frames
+  \item Use the trained VGGish to compute the embedding vector for each frame
+  \item Use these labeled embedding vectors to train the final classifier (SVM)
+  \item Thus the final classifier learns to classify embedding vectors to species
+  \item Note: the final classifier does not have access to any information about patterns extending over > 0.96s.
+  \item OTOH, it does use many 0.96s frames per species.
+  \end{enumerate}
+\item {\bf Inference}:
+  \begin{enumerate}
+  \item The bird recording is broken into 0.96s frames.
+  \item For each frame, use the trained VGGish to compute the embedding
+  \item Use the trained final classifier to classify the embedding to species
+  \item Final classification is majority-vote among embedding classifications
+  \end{enumerate}
+
+
+\section{Inference}
+What do we ultimately want the inference phase to look like?
+\begin{enumerate}
+\item Take short frames from a live recording in real time and map them to classifier output?
+  If so, then we will not be using any information about extended patterns (duration, repeat intervals etc) in
+  the vocalisation.
+\item Or, also be able to process entire recording?
+\end{enumerate}
+\end{enumerate}
+
+\section*{Training data}
+\begin{enumerate}
+\item For every bird species, there exist many recordings labeled with the species name.
+\item It is unknown at what times during the recording the bird is vocalising, and it is unknown which vocalisation
+  types are involved (song, call, etc).
+\item Suppose we are in a location at which there is only one possible bird species, and it makes one vocalisation type only.
+\item We record audio. The problem is now
+  \begin{quote}
+    Does the audio contain the bird noise which is present in the training data?
+  \end{quote}
+\item What would a likelihood-based approach look like?
+
+\section*{Notation }
+\begin{tabular}{l|l}
+  $y_n \in \R$                   & The signal (amplitude) at time point $n$. \\
+  $Y_m \in \R^d$                 & The frequency-domain coordinates for the signal during time-window $m$
+\end{tabular}
+
+
+\section{Model}
+
+Let $k$ be the species identity (or no-bird). The likelihood of the data is
+\begin{align*}
+  P(y|k) = P(y_1, \ldots, y_N | k)
+\end{align*}
+Alternatively, we can compute the likelihood of the Short-time Fourier-transformed signal:
+\begin{align*}
+  P(y|k) = P(Y|k) = P(Y_1, \ldots, Y_M|k).
+\end{align*}
+
+\begin{enumerate}
+\item The input vector (field recording) has one variable high dimension (number of time windows); the second
+  dimension could be fixed frequency windows.
+\item The output layer is $K$-dimensional, where $K$ is the number of possible species.
+\item So we could try to find sub-intervals of the input time dimension which give strong signal in the output layer.
+\end{enumerate}
+
+
+\section{Ideal model}
+
+\begin{enumerate}
+\item From the training data for species $k$, we learn a generative model for that species' vocalisations.
+\item We classify new data $y$ to the species model under which $y$ has highest likelihood of being generated.
+\end{enumerate}
+So what would a generative model for recording data look like?
+
+
+
+What would a generative model for STFT look like?
+
+
+\end{enumerate}
+We could
+\begin{enumerate}
+\item Classify each recording to the closest training sample. What distance metric?
+\end{enumerate}
+
+
+(For now, we assume that different bird vocalisations do not overlap in time.)
+\end{document}
+The discrete-time STFT divides the signal into time windows, and performs a Fourier transform on each window.
+
+The Fourier transform of a signal can be viewed as representing the signal by its coordinates in a new basis.
+
+The STFT converts the 1D time series into a higher dimensional time series (with coarser time buckets).
+
+
+
+What is an example of a STFT-based algorithm that could conceivably work?
diff --git a/hmm.tex b/hmm.tex
new file mode 100644
index 0000000..c8674e9
--- /dev/null
+++ b/hmm.tex
@@ -0,0 +1,6 @@
+\documentclass{article}
+
+\begin{document}
+
+
+\end{document}
\ No newline at end of file
diff --git a/linear-algebra-kun-pimbook-exercises.tex b/linear-algebra-kun-pimbook-exercises.tex
new file mode 100644
index 0000000..898d87b
--- /dev/null
+++ b/linear-algebra-kun-pimbook-exercises.tex
@@ -0,0 +1,34 @@
+\begin{mdframed}
+\includegraphics[width=400pt]{img/linear-algebra-kun-pimbook-exercises--6242.png}
+\end{mdframed}
+
+The properties of the zero vector are
+\begin{enumerate}
+\item $\0 + u = u$ for every vector $u$ (additive identity)
+\item $a\0 = \0$ for every scalar $a$
+\end{enumerate}
+
+\begin{proof}
+  Let $a \neq 1$ be a scalar from the field.
+
+  We have $av - v = aw - w$, since both are equal to $\0$. Therefore $v(a - 1) = w(a - 1)$, therefore $v = w$.
+\end{proof}
+
+\begin{proof}
+  Let $u \neq \0$ be a vector. We have $u + v = u + w = 0$, therefore $v = w$.
+\end{proof}
+
+\begin{mdframed}
+\includegraphics[width=400pt]{img/linear-algebra-kun-pimbook-exercises--f0b5.png}
+\end{mdframed}
+
+\begin{proof}
+\begin{align*}
+  (g \circ f)(ax + by)
+  &= g(f(ax + by)) \\
+  &= g(af(x) + bf(y)) \\
+  &= ag(f(x)) + bg(f(y)) \\
+  &= a(g \circ f)(x) + b(g \circ f)(y)
+\end{align*}
+\end{proof}
+
diff --git a/neural_networks.tex b/neural_networks.tex
new file mode 100644
index 0000000..d21d0c5
--- /dev/null
+++ b/neural_networks.tex
@@ -0,0 +1,12 @@
+
+\begin{tabular}{l|l}
+  $d$ & dimension of an input vector \\
+  $K$ & number of output classification classes \\
+\end{tabular}
+
+
+\begin{enumerate}
+\item A vanilla FC classification network is a map from $\R^d \to \R^k$.
+\item A CNN is the same, but a neuron in layer $l$ is connected to a subset of neurons in layer $l-1$.
+\end{enumerate}
+
diff --git a/xenops.tex b/xenops.tex
new file mode 100644
index 0000000..10f148f
--- /dev/null
+++ b/xenops.tex
@@ -0,0 +1,51 @@
+\documentclass{article}
+\begin{document}
+
+
+
+
+
+
+Xenops is a $\text{\LaTeX}$ editing environment for Emacs
+
+All $\dot{y}$ math
+\begin{align*}
+  \frac{\partial L}{\partial \dot{y}}
+\end{align*}
+is displayed as SVG.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+\end{document}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Inline $\dot{x}$ and display math
+\begin{align*}
+  \frac{\partial L}{\partial \dot{x}}
+\end{align*}
+are converted to SVG as you type.