forked from jhillyerd/enmime
-
Notifications
You must be signed in to change notification settings - Fork 1
/
header.go
700 lines (621 loc) · 18.8 KB
/
header.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
package enmime
import (
"bufio"
"bytes"
"fmt"
"mime"
"net/textproto"
"strings"
"github.com/jhillyerd/enmime/internal/coding"
"github.com/jhillyerd/enmime/internal/stringutil"
"github.com/pkg/errors"
)
const (
// Standard MIME content dispositions
cdAttachment = "attachment"
cdInline = "inline"
// Standard MIME content types
ctAppPrefix = "application/"
ctAppOctetStream = "application/octet-stream"
ctMultipartAltern = "multipart/alternative"
ctMultipartMixed = "multipart/mixed"
ctMultipartPrefix = "multipart/"
ctMultipartRelated = "multipart/related"
ctTextPrefix = "text/"
ctTextPlain = "text/plain"
ctTextHTML = "text/html"
// Used as a placeholder in case of malformed Content-Type headers
ctPlaceholder = "x-not-a-mime-type/x-not-a-mime-type"
// Used as a placeholder param value in case of malformed
// Content-Type/Content-Disposition parameters that lack values.
// E.g.: Content-Type: text/html;iso-8859-1
pvPlaceholder = "not-a-param-value"
// Standard Transfer encodings
cte7Bit = "7bit"
cte8Bit = "8bit"
cteBase64 = "base64"
cteBinary = "binary"
cteQuotedPrintable = "quoted-printable"
// Standard MIME header names
hnContentDisposition = "Content-Disposition"
hnContentEncoding = "Content-Transfer-Encoding"
hnContentID = "Content-ID"
hnContentType = "Content-Type"
hnMIMEVersion = "MIME-Version"
// Standard MIME header parameters
hpBoundary = "boundary"
hpCharset = "charset"
hpFile = "file"
hpFilename = "filename"
hpName = "name"
hpModDate = "modification-date"
utf8 = "utf-8"
)
// AddressHeaders is the set of SMTP headers that contain email addresses, used by
// Envelope.AddressList(). Key characters must be all lowercase.
var AddressHeaders = map[string]bool{
"bcc": true,
"cc": true,
"delivered-to": true,
"from": true,
"reply-to": true,
"to": true,
"sender": true,
"resent-bcc": true,
"resent-cc": true,
"resent-from": true,
"resent-reply-to": true,
"resent-to": true,
"resent-sender": true,
}
// Terminology from RFC 2047:
// encoded-word: the entire =?charset?encoding?encoded-text?= string
// charset: the character set portion of the encoded word
// encoding: the character encoding type used for the encoded-text
// encoded-text: the text we are decoding
// readHeader reads a block of SMTP or MIME headers and returns a textproto.MIMEHeader.
// Header parse warnings & errors will be added to p.Errors, io errors will be returned directly.
func readHeader(r *bufio.Reader, p *Part) (textproto.MIMEHeader, error) {
// buf holds the massaged output for textproto.Reader.ReadMIMEHeader()
buf := &bytes.Buffer{}
tp := textproto.NewReader(r)
firstHeader := true
for {
// Pull out each line of the headers as a temporary slice s
s, err := tp.ReadLineBytes()
if err != nil {
buf.Write([]byte{'\r', '\n'})
break
}
firstColon := bytes.IndexByte(s, ':')
firstSpace := bytes.IndexAny(s, " \t\n\r")
if firstSpace == 0 {
// Starts with space: continuation
buf.WriteByte(' ')
buf.Write(textproto.TrimBytes(s))
continue
}
if firstColon == 0 {
// Can't parse line starting with colon: skip
p.addError(ErrorMalformedHeader, "Header line %q started with a colon", s)
continue
}
if firstColon > 0 {
// Contains a colon, treat as a new header line
if !firstHeader {
// New Header line, end the previous
buf.Write([]byte{'\r', '\n'})
}
// Behavior change in net/textproto package in Golang 1.12.10 and 1.13.1:
// A space preceding the first colon in a header line is no longer handled
// automatically due to CVE-2019-16276 which takes advantage of this
// particular violation of RFC-7230 to exploit HTTP/1.1
if bytes.Contains(s[:firstColon+1], []byte{' ', ':'}) {
s = bytes.Replace(s, []byte{' ', ':'}, []byte{':'}, 1)
}
s = textproto.TrimBytes(s)
buf.Write(s)
firstHeader = false
} else {
// No colon: potential non-indented continuation
if len(s) > 0 {
// Attempt to detect and repair a non-indented continuation of previous line
buf.WriteByte(' ')
buf.Write(s)
p.addWarning(ErrorMalformedHeader, "Continued line %q was not indented", s)
} else {
// Empty line, finish header parsing
buf.Write([]byte{'\r', '\n'})
break
}
}
}
buf.Write([]byte{'\r', '\n'})
tr := textproto.NewReader(bufio.NewReader(buf))
header, err := tr.ReadMIMEHeader()
return header, errors.WithStack(err)
}
// decodeHeader decodes a single line (per RFC 2047) using Golang's mime.WordDecoder
func decodeHeader(input string) string {
if !strings.Contains(input, "=?") {
// Don't scan if there is nothing to do here
return input
}
dec := new(mime.WordDecoder)
dec.CharsetReader = coding.NewCharsetReader
header, err := dec.DecodeHeader(input)
if err != nil {
return input
}
return header
}
// decodeToUTF8Base64Header decodes a MIME header per RFC 2047, reencoding to =?utf-8b?
func decodeToUTF8Base64Header(input string) string {
if !strings.Contains(input, "=?") {
// Don't scan if there is nothing to do here
return input
}
// The standard lib performs an incremental inspection of this string, where the
// "skipSpace" method only strings.trimLeft for spaces and tabs. Here we have a
// hard dependency on space existing and not on next expected rune
//
// For resolving #112 with the least change, I will implement the
// "quoted display-name" detector, which will resolve the case specific
// issue stated in #112, but only in the case of a quoted display-name
// followed, without whitespace, by addr-spec.
tokens := strings.FieldsFunc(quotedDisplayName(input), whiteSpaceRune)
output := make([]string, len(tokens))
for i, token := range tokens {
if len(token) > 4 && strings.Contains(token, "=?") {
// Stash parenthesis, they should not be encoded
prefix := ""
suffix := ""
if token[0] == '(' {
prefix = "("
token = token[1:]
}
if token[len(token)-1] == ')' {
suffix = ")"
token = token[:len(token)-1]
}
// Base64 encode token
output[i] = prefix + mime.BEncoding.Encode("UTF-8", decodeHeader(token)) + suffix
} else {
output[i] = token
}
}
// Return space separated tokens
return strings.Join(output, " ")
}
func quotedDisplayName(s string) string {
if !strings.HasPrefix(s, "\"") {
return s
}
idx := strings.LastIndex(s, "\"")
return fmt.Sprintf("%s %s", s[:idx+1], s[idx+1:])
}
// ParseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function.
//
// Tolerances accounted for:
// * Missing ';' between content-type and media parameters
// * Repeating media parameters
// * Unquoted values in media parameters containing 'tspecials' characters
func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string, err error) {
mtype, params, err = mime.ParseMediaType(
fixUnescapedQuotes(fixUnquotedSpecials(fixMangledMediaType(ctype, ';'))))
if err != nil {
if err.Error() == "mime: no media type" {
return "", nil, nil, nil
}
return "", nil, nil, errors.WithStack(err)
}
if mtype == ctPlaceholder {
mtype = ""
}
for name, value := range params {
if value != pvPlaceholder {
continue
}
invalidParams = append(invalidParams, name)
delete(params, name)
}
return mtype, params, invalidParams, err
}
// fixMangledMediaType is used to insert ; separators into media type strings that lack them, and
// remove repeated parameters.
func fixMangledMediaType(mtype string, sep rune) string {
strsep := string([]rune{sep})
if mtype == "" {
return ""
}
parts := stringutil.SplitQuoted(mtype, sep, '"')
mtype = ""
if strings.Contains(parts[0], "=") {
// A parameter pair at this position indicates we are missing a content-type.
parts[0] = fmt.Sprintf("%s%s %s", ctAppOctetStream, strsep, parts[0])
parts = strings.Split(strings.Join(parts, strsep), strsep)
}
for i, p := range parts {
switch i {
case 0:
if p == "" {
// The content type is completely missing. Put in a placeholder.
p = ctPlaceholder
}
// Check for missing token after slash.
if strings.HasSuffix(p, "/") {
switch p {
case ctTextPrefix:
p = ctTextPlain
case ctAppPrefix:
p = ctAppOctetStream
case ctMultipartPrefix:
p = ctMultipartMixed
default:
// Safe default
p = ctAppOctetStream
}
}
default:
if len(p) == 0 {
// Ignore trailing separators.
continue
}
if !strings.Contains(p, "=") {
p = p + "=" + pvPlaceholder
}
// RFC-2047 encoded attribute name.
p = rfc2047decode(p)
pair := strings.SplitAfter(p, "=")
if strings.Contains(mtype, strings.TrimSpace(pair[0])) {
// Ignore repeated parameters.
continue
}
if strings.ContainsAny(pair[0], "()<>@,;:\"\\/[]?") {
// Attribute is a strict token and cannot be a quoted-string. If any of the above
// characters are present in a token it must be quoted and is therefor an invalid
// attribute. Discard the pair.
continue
}
}
mtype += p
// Only terminate with semicolon if not the last parameter and if it doesn't already have a
// semicolon.
if i != len(parts)-1 && !strings.HasSuffix(mtype, ";") {
mtype += ";"
}
}
if strings.HasSuffix(mtype, ";") {
mtype = mtype[:len(mtype)-1]
}
return mtype
}
// consumeParam takes the the parameter part of a Content-Type header, returns a clean version of
// the first parameter (quoted as necessary), and the remainder of the parameter part of the
// Content-Type header.
//
// Given this this header:
// `Content-Type: text/calendar; charset=utf-8; method=text/calendar`
// `consumeParams` should be given this part:
// ` charset=utf-8; method=text/calendar`
// And returns (first pass):
// `consumed = "charset=utf-8;"`
// `rest = " method=text/calendar"`
// Capture the `consumed` value (to build a clean Content-Type header value) and pass the value of
// `rest` back to `consumeParam`. That second call will return:
// `consumed = " method=\"text/calendar\""`
// `rest = ""`
// Again, use the value of `consumed` to build a clean Content-Type header value. Given that `rest`
// is empty, all of the parameters have been consumed successfully.
//
// If `consumed` is returned empty and `rest` is not empty, then the value of `rest` does not
// begin with a parsable parameter. This does not necessarily indicate a problem. For example,
// if there is trailing whitespace, it would be returned here.
func consumeParam(s string) (consumed, rest string) {
i := strings.IndexByte(s, '=')
if i < 0 {
return "", s
}
param := strings.Builder{}
param.WriteString(s[:i+1])
s = s[i+1:]
value := strings.Builder{}
valueQuotedOriginally := false
valueQuoteAdded := false
valueQuoteNeeded := false
var r rune
findValueStart:
for i, r = range s {
switch r {
case ' ', '\t':
param.WriteRune(r)
case '"':
valueQuotedOriginally = true
valueQuoteAdded = true
value.WriteRune(r)
break findValueStart
case ';':
if value.Len() == 0 {
value.WriteString(`"";`)
}
break findValueStart
default:
valueQuotedOriginally = false
valueQuoteAdded = false
value.WriteRune(r)
break findValueStart
}
}
if len(s)-i < 1 {
// parameter value starts at the end of the string, make empty
// quoted string to play nice with mime.ParseMediaType
param.WriteString(`""`)
} else {
// The beginning of the value is not at the end of the string
quoteIfUnquoted := func() {
if !valueQuoteNeeded {
if !valueQuoteAdded {
param.WriteByte('"')
valueQuoteAdded = true
}
valueQuoteNeeded = true
}
}
for _, v := range []byte{'(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '='} {
if s[0] == v {
quoteIfUnquoted()
}
}
s = s[i+1:]
findValueEnd:
for len(s) > 0 {
switch s[0] {
case ';', ' ', '\t':
if valueQuotedOriginally {
// We're in a quoted string, so whitespace is allowed.
value.WriteByte(s[0])
s = s[1:]
break
}
// Otherwise, we've reached the end of an unquoted value.
param.WriteString(value.String())
value.Reset()
if valueQuoteNeeded {
param.WriteByte('"')
}
param.WriteByte(s[0])
s = s[1:]
break findValueEnd
case '"':
if valueQuotedOriginally {
// We're in a quoted value. This is the end of that value.
param.WriteString(value.String())
value.Reset()
param.WriteByte(s[0])
s = s[1:]
break findValueEnd
}
quoteIfUnquoted()
value.WriteByte('\\')
value.WriteByte(s[0])
s = s[1:]
case '\\':
if len(s) > 1 {
value.WriteByte(s[0])
s = s[1:]
// Backslash escapes the next char. Consume that next char.
value.WriteByte(s[0])
quoteIfUnquoted()
}
// Else there is no next char to consume.
s = s[1:]
case '(', ')', '<', '>', '@', ',', ':', '/', '[', ']', '?', '=':
quoteIfUnquoted()
fallthrough
default:
value.WriteByte(s[0])
s = s[1:]
}
}
}
if value.Len() > 0 {
// There is a value that ends with the string. Capture it.
param.WriteString(value.String())
if valueQuotedOriginally || valueQuoteNeeded {
// If valueQuotedOriginally is true and we got here,
// that means there was no closing quote. So we'll add one.
// Otherwise, we're here because it was an unquoted value
// with a special char in it, and we had to quote it.
param.WriteByte('"')
}
}
return param.String(), s
}
// fixUnquotedSpecials as defined in RFC 2045, section 5.1:
// https://tools.ietf.org/html/rfc2045#section-5.1
func fixUnquotedSpecials(s string) string {
idx := strings.IndexByte(s, ';')
if idx < 0 || idx == len(s) {
// No parameters
return s
}
clean := strings.Builder{}
clean.WriteString(s[:idx+1])
s = fixUnquotedValueWithSpaces(s[idx+1:], ';')
for len(s) > 0 {
var consumed string
consumed, s = consumeParam(s)
if len(consumed) == 0 {
clean.WriteString(s)
return clean.String()
}
clean.WriteString(consumed)
}
return clean.String()
}
// fixUnescapedQuotes inspects for unescaped quotes inside of a quoted string and escapes them
//
// Input: application/rtf; charset=iso-8859-1; name=""V047411.rtf".rtf"
// Output: application/rtf; charset=iso-8859-1; name="\"V047411.rtf\".rtf"
func fixUnescapedQuotes(hvalue string) string {
params := strings.SplitAfter(hvalue, ";")
sb := &strings.Builder{}
for i := 0; i < len(params); i++ {
// Inspect for "=" byte.
eq := strings.IndexByte(params[i], '=')
if eq < 0 {
// No "=", must be the content-type or a comment.
sb.WriteString(params[i])
continue
}
sb.WriteString(params[i][:eq])
param := params[i][eq:]
startingQuote := strings.IndexByte(param, '"')
closingQuote := strings.LastIndexByte(param, '"')
// Opportunity to exit early if there are no quotes.
if startingQuote < 0 && closingQuote < 0 {
// This value is not quoted, write the value and carry on.
sb.WriteString(param)
continue
}
// Check if only one quote was found in the string.
if closingQuote == startingQuote {
// Append the next chunk of params here in case of a semicolon mid string.
if len(params) > i+1 {
param = fmt.Sprintf("%s%s", param, params[i+1])
}
closingQuote = strings.LastIndexByte(param, '"')
i++
if closingQuote == startingQuote {
sb.WriteString("=\"\"")
return sb.String()
}
}
// Write the k/v separator back in along with everything up until the first quote.
sb.WriteByte('=')
// Starting quote
sb.WriteByte('"')
sb.WriteString(param[1:startingQuote])
// Get just the value, less the outer quotes.
rest := param[closingQuote+1:]
// If there is stuff after the last quote then we should escape the first quote.
if len(rest) > 0 && rest != ";" {
sb.WriteString("\\\"")
}
param = param[startingQuote+1 : closingQuote]
escaped := false
for strIdx := range []byte(param) {
switch param[strIdx] {
case '"':
// We are inside of a quoted string, so lets escape this guy if it isn't already escaped.
if !escaped {
sb.WriteByte('\\')
escaped = false
}
sb.WriteByte(param[strIdx])
case '\\':
// Something is getting escaped, a quote is the only char that needs
// this, so lets assume the following char is a double-quote.
escaped = true
sb.WriteByte('\\')
default:
escaped = false
sb.WriteByte(param[strIdx])
}
}
// If there is stuff after the last quote then we should escape
// the last quote, apply the rest and terminate with a quote.
switch rest {
case ";":
sb.WriteByte('"')
sb.WriteString(rest)
case "":
sb.WriteByte('"')
default:
sb.WriteByte('\\')
sb.WriteByte('"')
sb.WriteString(rest)
sb.WriteByte('"')
}
}
return sb.String()
}
// Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc.
func whiteSpaceRune(r rune) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}
// gets a string like: x-unix-mode=0644; name=File name with spaces.pdf; some-param=da da da
// returns a string like: x-unix-mode=0644; name="File name with spaces.pdf"
// A Bit of explanation on terminology
// attr is the key
// value is the value
// param refers to the combination of "attr=value" separated by a separator
func fixUnquotedValueWithSpaces(s string, sep byte) string {
// The clean string that we will return
clean := strings.Builder{}
// This is either attr or value depending on where we are at in a
// Content-Type param list
const (
attrMode = iota
valueMode
)
mode := attrMode
attr := strings.Builder{}
value := strings.Builder{}
insideQuotes := false
spaceEncountered := false
resetForNextParam := func() {
attr.Reset()
value.Reset()
insideQuotes = false
spaceEncountered = false
mode = attrMode
}
writeCleanParam := func() {
clean.WriteString(attr.String())
if spaceEncountered {
clean.WriteByte('"')
}
clean.WriteString(value.String())
if spaceEncountered {
clean.WriteByte('"')
}
}
for len(s) > 0 {
// fmt.Printf("\ns -> %s\nmode -> %s\n attr-> %s\n value-> %s\n insideQuotes->%t\n spaceEncountered-> %t\n\n==========\n\n", s, mode, attr.String(), value.String(), insideQuotes, spaceEncountered)
switch mode {
case attrMode:
if s[0] == '=' {
mode = valueMode
}
attr.WriteByte(s[0])
s = s[1:]
case valueMode:
// If we encounter an end, reset the state
if len(s) == 1 || s[0] == '\n' || s[0] == '\t' || ((s[0] == '"' || s[0] == ';') && !insideQuotes) {
if len(s) == 1 && s[0] != ';' {
value.WriteString(s)
}
writeCleanParam()
if len(s) > 1 || s[0] == ';' {
clean.WriteByte(s[0])
}
s = s[1:]
resetForNextParam()
break
}
if s[0] == '"' {
insideQuotes = true
}
if s[0] == ' ' && !insideQuotes {
spaceEncountered = true
}
value.WriteByte(s[0])
s = s[1:]
}
}
if attr.Len() > 0 {
clean.WriteString(attr.String())
}
return clean.String()
}