-
Notifications
You must be signed in to change notification settings - Fork 0
/
validate_ELAN_files.R
72 lines (64 loc) · 4.49 KB
/
validate_ELAN_files.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
install.packages("xfun")
library(tidyr)
library(tinytex)
library(readxl)
library(dplyr)
library(data.table)
library(xfun)
library(readxl)
# Note: change PATH with your own computer path. Probably, it wall vary a little if you use Windows or Mac.
# Step 1. Copy files on revision folder.
current_folder <- "/PATH/originales"
new_folder <- "/PATH/revision"
list_of_files <- list.files(current_folder, ".eaf$")
file.copy(file.path(current_folder,list_of_files), new_folder)
# Step 2. ADD new DTD line (revision can be make it afterwars with Oxygen XML editor).
gsub_dir(dir = new_folder, pattern = "<ANNOTATION_DOCUMENT AUTHOR=", replacement = "<!--<ANNOTATION_DOCUMENT AUTHOR=")
gsub_dir(dir = new_folder, pattern = "EAFv3.0.xsd\">", replacement = "EAFv3.0.xsd\"><!--><ANNOTATION_DOCUMENT>")
gsub_dir(dir = new_folder, pattern = "<!--<ANNOTATION_DOCUMENT AUTHOR=", replacement = "<!DOCTYPE ANNOTATION_DOCUMENT SYSTEM \"/PATH/anotaciones.dtd\">
<!--<ANNOTATION_DOCUMENT AUTHOR=")
gsub_dir(dir = new_folder, pattern = "<", replacement = "<")
gsub_dir(dir = new_folder, pattern = ">", replacement = ">")
gsub_dir(dir = new_folder, pattern = "<énfasis", replacement = "<enfasis")
gsub_dir(dir = new_folder, pattern = "<anónimo", replacement = "<anonimo")
gsub_dir(dir = new_folder, pattern = "</énfasis", replacement = "</enfasis")
gsub_dir(dir = new_folder, pattern = "</anónimo", replacement = "</anonimo")
# Step 3. Copy reviewed file in an ending folder
current_folder1 <- "/PATH/revision"
new_folder1 <- "/PATH/corpus"
list_of_files <- list.files(current_folder1, ".eaf$")
file.copy(file.path(current_folder1,list_of_files), new_folder1)
gsub_dir(dir = new_folder1, pattern = "<enfasis>", replacement = "<enfasis>")
gsub_dir(dir = new_folder1, pattern = "<anonimo>", replacement = "<anonimo>")
gsub_dir(dir = new_folder1, pattern = "</anonimo>", replacement = "</anonimo>")
gsub_dir(dir = new_folder1, pattern = "</enfasis>", replacement = "</enfasis>")
gsub_dir(dir = new_folder1, pattern = "<entre_risas>", replacement = "<entre_risas>")
gsub_dir(dir = new_folder1, pattern = "</entre_risas>", replacement = "</entre_risas>")
gsub_dir(dir = new_folder1, pattern = "<risas/>", replacement = "<risas/>")
gsub_dir(dir = new_folder1, pattern = "<alargamiento/>", replacement = "<alargamiento/>")
gsub_dir(dir = new_folder1, pattern = "<ininteligible/>", replacement = "<ininteligible/>")
gsub_dir(dir = new_folder1, pattern = "<susurro/>", replacement = "<susurro/>")
gsub_dir(dir = new_folder1, pattern = "<gritos/>", replacement = "<gritos/>")
gsub_dir(dir = new_folder1, pattern = "<tos/>", replacement = "<tos/>")
gsub_dir(dir = new_folder1, pattern = "<cita>", replacement = "<cita>")
gsub_dir(dir = new_folder1, pattern = "</cita>", replacement = "</cita>")
gsub_dir(dir = new_folder1, pattern = "<sic>", replacement = "<sic>")
gsub_dir(dir = new_folder1, pattern = "</sic>", replacement = "</sic>")
gsub_dir(dir = new_folder1, pattern = "<obs", replacement = "<obs")
gsub_dir(dir = new_folder1, pattern = "</obs>", replacement = "</obs>")
gsub_dir(dir = new_folder1, pattern = "<fsr", replacement = "<fsr")
gsub_dir(dir = new_folder1, pattern = "</fsr>", replacement = "</fsr>")
gsub_dir(dir = new_folder1, pattern = "<enfasis", replacement = "<enfasis")
gsub_dir(dir = new_folder1, pattern = "</enfasis>", replacement = "</enfasis>")
gsub_dir(dir = new_folder1, pattern = "<siglas", replacement = "<siglas")
gsub_dir(dir = new_folder1, pattern = "</siglas>", replacement = "</siglas>")
gsub_dir(dir = new_folder1, pattern = "<extranjero", replacement = "<extranjero")
gsub_dir(dir = new_folder1, pattern = "</extranjero>", replacement = "</extranjero>")
gsub_dir(dir = new_folder1, pattern = "(t=\"([^\"]*)\")(>)", replacement = "\\1>")
gsub_dir(dir = new_folder1, pattern = "(t=\"([^\"]*)\"/)(>)", replacement = "\\1>")
gsub_dir(dir = new_folder1, pattern = "/\">", replacement = "/\">")
gsub_dir(dir = new_folder1, pattern = "<!--<ANNOTATION_DOCUMENT AUTHOR=", replacement = "<ANNOTATION_DOCUMENT AUTHOR=")
gsub_dir(dir = new_folder1, pattern = "<!-->", replacement = "")
gsub_dir(dir = new_folder1, pattern = "EAFv3.0.xsd\"><!--><ANNOTATION_DOCUMENT>", replacement = "EAFv3.0.xsd\">")
gsub_dir(dir = new_folder1, pattern = "<!DOCTYPE ANNOTATION_DOCUMENT SYSTEM \"/PATH/anotaciones.dtd\">", replacement = "")
gsub_dir(dir = new_folder1, pattern = "EAFv3.0.xsd\"><ANNOTATION_DOCUMENT>", replacement = "EAFv3.0.xsd\">")