-
Notifications
You must be signed in to change notification settings - Fork 16
/
node-feedparser.node.txt
132 lines (115 loc) · 9.16 KB
/
node-feedparser.node.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
NODE-FEEDPARSER
ALTERNATIVES ==> # - node-feedparser (prefer):
# - most used
# - RSS 2.0, RSS 1.0, RSS 0.90, Atom 0.3, Atom 1.0, RDF feeds
# - provide abstraction layer betwen all those formats
# - supports many common XML namespaces
# - streaming
# - Node.js + CLI
# - feedme (prefer if JSONFeed or browser support):
# - RSS 2.0, Atom 1.0, JSONFeed
# - streaming
# - Node.js + browser
# - rss-parser:
# - RSS 2.0, RSS 1.0, RSS 0.90, Atom 1.0
# - supports few XML namespaces
# - Node.js only
SUPPORTS ==> # - RSS 2.0, RSS 1.0, RSS 0.90, Atom 0.3, Atom 1.0, RDF feeds, XHTML
# - Digital core: dc:date|subject|language|rights|creator|publisher
# - Itunes: itunes:summary|image|author|owner
# - MRSS: media:thumbnail|category|copyright|content|group
new FeedParser([OPTS]) #IOSTREAM in object mode with FEEDITEM as OBJs
#Also emit:
# - "meta" with META
# - "error" with ERROR with message STR:
# - "Not a feed"
# - Sax ERROR
#OPTS:
# - normalize BOOL (def: true):
# - try to abstract difference from different formats
# - do it by adding META....
# - addmeta BOOL (def: true): adds FEEDITEM.meta
# - feedurl "URI": canonical URL of the feed, if cannot be parsed
# - strict BOOL (def: false): do not ignore XML parsing errors
# - resume_saxerror BOOL (def: true): do not stop on XML parsing errors
# - MAX_BUFFER_LENGTH NUM (def: 16M): XML parsing buffer
#Uses Sax for XML parsing
/=+===============================+=\
/ : : \
)==: CHANNEL :==(
\ :_______________________________: /
\=+===============================+=/
META #<[rdf:]channel|[atom:]feed>
META.ATTR #For every other child not below
META.#type #"rss|atom|rdf"
META.#version #STR. Of RSS|Atom|RDF
META.#xml #STR. XML declaration as OBJ: version STR, encoding STR, etc.
META.#ns #{ NAMESPACE_URI: "NAMESPACE" }_ARR of XML namespaces
META.# #STR content, trimmed.
#Relative URIs are resolved, e.g. from <link|atom:icon|atom:logo>
META.@ #{ ATTR: VAL }_ARR
META.title #<title>. HTML is removed.
META.description #<description|subtitle|itunes:summary|tagline>. HTML is removed.
META.link #STR. Content, <link>, <... href rel="self|alternate"> or <atom:id> content.
META.xmlurl #Canonical link
META.pubdate|pubDate #DATE. <pubdate|pubDate|published|dc:date>
META.date #DATE. <lastbuilddate|lastBuildDate|modified|updated>
META.image #{ url, title } from
# - <image|logo|media:thumbnail url="URI" title="STR">
# - <image|logo> content
# - <itunes:image href="URI">
#Does not parse <width|height|description>
META.favicon #<icon>
META.categories #<[itunes|media:]category|dc:subject>
#STR_ARR, trimmed with no duplicates.
#Does not parse <domain>
META.ttl #<ttl>
META.language #<[dc:]language> or <... xml:lang="STR">
META.copyright #<[media:]copyright|[dc:]rights|creativecommons:license|cc:license>
META.author #<author name|email|uri="STR">, <author|managingEditor|webMaster> or
#<description|itunes:author|itunes:owner|dc:creator|dc:publisher>
META.docs #<docs>
META.generator #<generator> or <admin:generatoragent rdf:resource="STR">
#Also prepends STR from <generator uri="URI"> and "vVERSION" from <generator version="VERSION">
META.skipHours #<skipHours>
META.cloud #Either:
# - { type 'hub', ... }, from <[rdf:]channel|[atom:]feed ... rel="hub">
# - { type 'rsscloud', ... }, from <cloud ...>
META.rating #<rating>
/=+===============================+=\
/ : : \
)==: ITEM :==(
\ :_______________________________: /
\=+===============================+=/
FEEDITEM #<[rdf:]item|[atom:]entry>
FEEDITEM.ATTR #For every other child not below
FEEDITEM.meta #Same as META
FEEDITEM.title #<title>. HTML is removed.
FEEDITEM.description #<description|summary|content[:encoded]|itunes:summary>
FEEDITEM.summary #<summary>
FEEDITEM.guid #<guid|id> or FEEDITEM.link
FEEDITEM.permalink #<guid isPermalink="true">
FEEDITEM.link #<link|guid> content, <link href="URI" rel="alternate|self">
FEEDITEM.origlink #<link href="URI" rel="canonical"> or <feedburner|pheedo:origlink>
FEEDITEM.pubdate|pubDate #DATE. <pubDate|published|issued|dc:date>
FEEDITEM.date #DATE. <modified|updated>
FEEDITEM.enclosures #OBJ_ARR: url, type, length from:
# - <link href="URI" rel="enclosure" type length>
# - <enclosure url type length>
# - <media:content url type|medium filesize>
#Also OBJ_ARR: bitrate, framerate, samplingrate, duration, width|height from <media:content ...>
#Remove duplicates
FEEDITEM.image #{ url, title } from
# - <image|g:image_link> content
# - <media:thumbnail url="URI" title="STR">, included target of <media:content|group>
# - <itunes:image href="URI">
FEEDITEM.categories #<[itunes|media:]category|dc:subject>
#STR_ARR, trimmed with no duplicates.
FEEDITEM.author #<author|dc:creator|dc:publisher|itunes:author|itunes:owner> content,
#<author name|email|uri> or META.author
FEEDITEM.source #OBJ: title, url from:
# - <source url> and its content
# - <source href> and its child <title>
FEEDITEM.comments #<comments> or <link href="URI" rel="replies">