-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathtraread.js
277 lines (244 loc) · 9.14 KB
/
traread.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
'use strict';
const fsp = require('fs/promises');
/* Read a file as a sequence of newline-separated JSON stanzas.
A partial stanza at the end will be silently ignored.
It's okay if the JSON has more whitespace or newlines. You just need
at least one newline between stanzas.
If non-JSON occurs at the start or between stanzas, this will throw
an exception. Bad formatting inside a stanza will silently end the
parsing (after reading in the entire rest of the file). No, that's not
ideal.
This is an async generator (fancy!) You can use it in the following
ways:
for await (var obj of stanza_reader(path)) { ... }
var iter = stanza_reader(path);
for await (var obj of iter) { ... }
var iter = stanza_reader(path);
var res = await iter.next();
while (!res.done) {
// ...
res = await iter.next();
}
If you want to stop reading early, you must use the iterator form so
that you can call iter.return(). (This cleans up the file handle;
you don't want to leak that.)
*/
async function* stanza_reader(path)
{
const CHUNK = 512;
var buf = Buffer.alloc(CHUNK);
var buflen = 0; // amount of unconsumed text in buf
var fhan = await fsp.open(path, "r");
try {
while (true) {
// eat whitespace
var pos = 0;
while (true) {
while (pos < buflen
&& (buf[pos] == 0x20 || buf[pos] == 0x0A || buf[pos] == 0x0D || buf[pos] == 0x09)) { // whitespaces
pos++;
}
if (pos < buflen) {
break;
}
// ate whitespace to end of buffer; read a chunk and keep eating
if (buflen+CHUNK > buf.length) {
var newlen = buflen + CHUNK;
buf = Buffer.concat([buf], newlen);
}
var res = await fhan.read(buf, buflen, CHUNK);
if (res.bytesRead == 0) {
await fhan.close();
fhan = null;
return; // end of file
}
buflen += res.bytesRead;
}
// pos is now on the first non-whitespace; trim everything before tthat. (We should have nonzero text left.)
buf = buf.subarray(pos);
buflen -= pos;
if (buflen == 0) {
throw new Error('assert: should have text after eating whitespace');
}
if (buf[0] != 0x7B) { // '{'
// The next text is not a JSON stanza. That's bad.
throw new Error('non-JSON encountered');
}
pos = 0;
var obj = null;
while (true) {
// search for the next newline
while (true) {
while (pos < buflen && (buf[pos] != 0x0A && buf[pos] != 0x0D)) {
pos++;
}
if (pos < buflen) {
break;
}
// ate non-newlines to end of buffer; read a chunk and keep eating
if (buflen+CHUNK > buf.length) {
var newlen = buflen + CHUNK;
buf = Buffer.concat([buf], newlen);
}
var res = await fhan.read(buf, buflen, CHUNK);
if (res.bytesRead == 0) {
await fhan.close();
fhan = null;
return; // end of file
// We probably have an incomplete JSON stanza in the buffer, but we ignore that.
}
buflen += res.bytesRead;
}
// pos is now on a newline. Eat that, then check to see if we've got a complete stanza.
pos++;
var str = buf.toString('utf8', 0, pos);
try {
obj = JSON.parse(str);
break;
}
catch (ex) {
// Nope, look for the next newline
continue;
}
}
if (obj === null) {
throw new Error('assert: left loop without object');
}
// Trim buffer, yield, and continue
buf = buf.subarray(pos);
buflen -= pos;
yield obj;
// We return from here if the caller calls iter.return().
}
}
finally {
// If we throw or return early...
if (fhan !== null) {
await fhan.close();
fhan = null;
}
}
}
/* Metadata keys that we are interested in, in the order we are interested
in them. */
const metadata_keylist = [
'title', 'author', 'headline', 'firstpublished',
'ifid', 'format', 'tuid'
];
/* Parse a transcript file and write a plain-text equivalent to a new file.
This is a JS port of the read-glktra.py utility in the GlkOte repo.
See add_stanza() etc in trashow.js for a similar implementation. Yes,
there's a refactor opportunity here.
Differences:
- We only pay attention to buffer windows. The status line (assuming
that's a grid) is lost.
- If there's more than one buffer window, the output is interleaved
in a single display stream.
- Window-clear events are shown as horizontal rules.
- Graphics are currently not supported. (Not even the alt text.)
*/
async function stanzas_write_to_file(path, trapath, options)
{
var fhan = null;
async function add_stanza(obj)
{
if (obj.metadata) {
var anylines = false;
for (var key of metadata_keylist) {
if (obj.metadata[key]) {
if (!anylines) {
anylines = true;
await fhan.write(('--'.repeat(36)) + '-\n');
}
var val = key + ': ' + obj.metadata[key] + '\n';
await fhan.write(val);
}
}
if (anylines)
await fhan.write(('--'.repeat(36)) + '-\n');
}
if (obj.output) {
if (options && options.timestamps) {
var date = new Date(obj.timestamp);
var intimestr = date.toTimeString().slice(0, 8) + ', ' + date.toDateString().slice(4);
var durstr = ': ' + (obj.outtimestamp - obj.timestamp) + ' ms';
await fhan.write('[' + intimestr + durstr + '] ');
}
if (obj.output.content) {
for (var dat of obj.output.content) {
if (dat.text) {
if (dat.clear) {
await fhan.write('\n' + ('- '.repeat(36)) + '-\n');
}
if (dat.text) {
await add_stanza_linedata(dat.text);
}
}
}
}
}
}
async function add_stanza_linedata(text)
{
for (let ix=0; ix<text.length; ix++) {
const textarg = text[ix];
const content = textarg.content;
if (textarg.append) {
if (!content || !content.length)
continue;
}
else {
await fhan.write('\n');
}
// skip textarg.flowbreak for now
if (!content || !content.length) {
continue;
}
for (let sx=0; sx<content.length; sx++) {
const rdesc = content[sx];
let rstyle, rtext, rlink;
if (!(typeof rdesc === 'string' || rdesc instanceof String)) {
if (rdesc.special !== undefined) {
if (rdesc.special == 'image') {
var val;
if (rdesc.alttext)
val = '[image: ' + rdesc.alttext + ']';
else
val = '[image ' + rdesc.image + ']';
await fhan.write(val);
}
continue;
}
rstyle = rdesc.style;
rtext = rdesc.text;
rlink = rdesc.hyperlink;
}
else {
rstyle = rdesc;
sx++;
rtext = content[sx];
rlink = undefined;
}
// ignore rlink
await fhan.write(rtext);
}
}
}
try {
fhan = await fsp.open(path, "w");
for await (var obj of stanza_reader(trapath)) {
await add_stanza(obj);
}
await fhan.write('\n');
}
finally {
// If we throw or return early...
if (fhan !== null) {
await fhan.close();
fhan = null;
}
}
}
exports.stanza_reader = stanza_reader;
exports.stanzas_write_to_file = stanzas_write_to_file;
exports.metadata_keylist = metadata_keylist;