-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsyllabus.tex
112 lines (103 loc) · 5.53 KB
/
syllabus.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
\documentclass[12pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{hyperref}
%\usepackage{nopageno}
\hypersetup{
colorlinks=true,
linkcolor=black,
filecolor=magenta,
urlcolor=cyan,
}
\author{Kristoffer L. Nielbo}
\begin{document}
%\thispagestyle{empty}
\begin{center}
{\large Aarhus University}\\
\bigskip
{\large Summer University 2016}\\
July 25 to August 5 \footnote{Notice the exam date is August 12 for oral and/or written exams.}\\
\bigskip
{\LARGE\textbf{Text Mining the Great Unread}}\\
\end{center}
\bigskip
Instructors:\\
Kristoffer L Nielbo ([email protected]) \\
Hilke Reckman ([email protected])\\
\noindent\rule{4cm}{0.4pt}
\bigskip
\noindent INTERACTING MINDS CENTRE\\
Jens Chr. Skous Vej 4, Building 1483, 3rd floor\\
DK- 8000 Aarhus C\\
http://interactingminds.au.dk/\\
\noindent\makebox[\linewidth]{\rule{\paperwidth}{0.4pt}}
\section*{Program overview}
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{||c c c c c c||}
\hline
Time & Monday & Tuesday & Wednesday & Thursday & Friday \\ [0.5ex]
\hline\hline
9:00-10:30 & Introduction & Language models & Design + Communication & Named entities & Machine learning \\
\hline
11:00-12:30 & Text analytics & Data preparation & Sentiments & Associations & Clustering\\
\hline
13:30-15:00 & R basics & Counting words & Code fest & Code fest & Classification\\
\hline
15:15-17:00 & The art of R & In groups & In groups & Code fest & Social event\\ [1ex]
\hline
\end{tabular}}
\end{center}
% week 31 (1-5/7)
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{||c c c c c c||}
\hline
Time & Monday & Tuesday & Wednesday & Thursday & Friday \\ [0.5ex]
\hline\hline
9:00-10:30 & & Latent variables & Library* & Unsilo* & Proposal review \\
\hline
11:00-12:30 & Social media* & Latent variables & Library* & Temporal dynamics* & Proposal review\\
\hline
13:30-15:00 & Business analytics* & Network analysis* & Library* & Word embedding* & Proposal review\\
\hline
15:15-17:00 & & Supervision & Supervision & Supervision & Social event\\ [1ex]
\hline
\end{tabular}}
\end{center}
{\scriptsize * Guest lecture.}
\bigskip
%\pagebreak
\section*{Textbooks}
\begin{itemize}
\item Jockers, M. (2014). Text Analysis with R for Students of Literature (2014 edition). New York: Springer.\footnote{Book should be acquired beforehand, see \hyperlink{prep_det}{Preparation Details}, use \url{http://www.matthewjockers.net/text-analysis-with-r-for-students-of-literature/} from your university}.
\item Miner, G. (2012). Practical text mining and statistical analysis for non-structured text data applications. Waltham, MA: Academic Press, chp. 1-5.
\end{itemize}
\section*{Articles \& Chapters}
\begin{itemize}
\item Alberich, R., Miro-Julia, J., \& Rossello, F. (2002). Marvel Universe looks almost like a real social network. arXiv Preprint Cond-mat/0202174. Retrieved from http://arxiv.org/abs/cond-mat/0202174.
\item Blei, D. M. (2012). Probabilistic topic models. Communications of the ACM, 55(4), 77–84.**
\item Brücher, H., Knolmayer, G., \& Mittermayer, M.-A. (2002). Document classification methods for organizing explicit knowledge. Institut fur Wirtschaftsinformatik der Universität Bern.**
\item Church, K. W., \& Hanks, P. (1990). Word association norms, mutual information, and lexicography. Computational Linguistics, 16(1), 22–29.**
\item Derczynski, L., Maynard, D., Rizzo, G., van Erp, M., Gorrell, G., Troncy, R., \& Bontcheva, K. (2015). Analysis of named entity recognition and linking for tweets. Information Processing \& Management, 51(2), 32–49.
\item Dodds, P. S., Clark, E. M., Desu, S., Frank, M. R., Reagan, A. J., \& Williams, J. R. (2015). Human language reveals a universal positivity bias. Proceedings of the National Academy of Sciences, 112(8), 2389–2394.**
\item Fayyad, U., Piatetsky-Shapiro, G., \& Smyth, P. (1996). From data mining to knowledge discovery in databases. AI Magazine, 17(3), 37.
\item Jockers, M. L., \& Witten, D. M. (2010). A comparative study of machine learning methods for authorship attribution. Literary and Linguistic Computing, fqq001.**
\item Mimno, D. (2012). Computational historiography: Data mining in a century of classics journals. Journal on Computing and Cultural Heritage (JOCCH), 5(1), 3.**
\item Pechenick, E. A., Danforth, C. M., \& Dodds, P. S. (2015). Characterizing the Google Books corpus: strong limits to inferences of socio-cultural and linguistic evolution. PloS One, 10(10), e0137041.**
\item Slingerland, E., \& Chudek, M. (2011). The Prevalence of Mind-Body Dualism in Early China. Cognitive Science, 35(5), 997–1007.
\item Tangherlini, T. R., \& Leonard, P. (2013). Trawling in the Sea of the Great Unread: Sub-corpus topic modeling and Humanities research. Poetics, 41(6), 725–749.
\item Underwood, T. (2016). The Life Cycles of Genres. Retrieved from https://www.ideals.illinois.edu/handle/2142/90161
\end{itemize}
{\scriptsize ** Master's level only.}
\section*{\hypertarget{prep_det}{Preparation Details}}
\begin{itemize}
\item Read textbook Jockers 2014.
\item Install R: \url{https://www.r-project.org/}
\item Although not strictly necessary, we recommend that you use an IDE such as RStudio Desktop (Open Source Edition): \url{https://www.rstudio.com/products/RStudio/#Desktop}
\item If you are planning to work on your own corpus, please prepare it as as separate text files (txt filename extension). Should you have any specific requests, please contact: \href{mailto:[email protected]}{[email protected]}.
\end{itemize}
\end{document}