This repository has been archived by the owner on Aug 22, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
items.py
45 lines (36 loc) · 1.65 KB
/
items.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
from scrapy import Item, Field
class VideoItem(Item): # 视频信息
# 基本信息, 将由 spider.parse_basic() 解析
url = Field() # 页面链接
vid = Field() # 唯一识别码
title = Field() # 标题
thumb_url = Field() # 缩略图下载链接(由ThumbPipeline下载)
thumb_path = Field() # 缩略图本地路径(由ThumbPipeline填入)
time = Field() # 时长
stat_play = Field() # 播放量
stat_cmt = Field() # 评论量
# 详细信息, 将由 spider.parse_detail() 解析
subtitle = Field() # 副标题
category = Field() # 分类
channel_name = Field() # 频道名称
channel_link = Field() # 频道链接
# 文件信息, 将由 spider.parse_file() 解析
file_urls = Field() # 分段视频下载链接列表(由FilesPipeline下载)
file_paths = Field() # 分段视频本地路径列表(由FilesPipeline填入)
# 评论信息, 将由 spider.parse_comment() 解析
comment_list = Field() # 所有评论列表, 嵌套若干 CommentItem
comment_num = Field() # 评论数目
comment_hot = Field() # 热评id列表
class CommentItem(Item): # 评论信息
id = Field() # 唯一识别码
time = Field() # 发布时间戳
user = Field() # 发布用户ID
content = Field() # 评论内容
num_up = Field() # 点赞数
num_down = Field() # 点踩数
num_reply = Field() # 回复数