-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
executable file
·51 lines (40 loc) · 1.1 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint:disable=relative-import
'''
入口文件
'''
import sys
from multiprocessing import Process
from conf import SPIDER_DIC
from spider_process import get_default_setting, get_spider_process, crawl
def spider_settings(settings):
dict_pipelines = {}
settings.set("ITEM_PIPELINES", dict_pipelines)
dict_pipelines['pipelines.JsonWriterPipeline'] = 200
# middlewares
download_middlewares_dict = {
"middlewares.useragentmw.RandomUserAgentMiddleware": 100,
"middlewares.cookiejarmw.CookieMW": 120
}
settings.set("DOWNLOADER_MIDDLEWARES", download_middlewares_dict)
return settings
def runspider(*args):
"""多进程"""
proc = Process(target=crawl, args=args)
proc.start()
proc.join()
def main():
''' 解析参数并运行相应的模式 '''
settings = spider_settings(get_default_setting())
runspider(
get_spider_process(settings),
SPIDER_DIC['qq'],
'www.qq.com?book_id=123242',
'asdfa'
)
if __name__ == "__main__":
'''
main
'''
sys.exit(main())