-
Notifications
You must be signed in to change notification settings - Fork 0
/
jumia_seller.py
57 lines (49 loc) · 1.8 KB
/
jumia_seller.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
from bs4 import BeautifulSoup
import requests
import csv
seller = 'la-colors'
pagination = '3'
siteUrl = 'https://www.jumia.co.ke/'
headers = {'User-Agent': 'Mozilla/5.0'}
page= '?page='+pagination
baseUrl = siteUrl+seller+page
#fullUrl =
#page = urllib2.urlopen(baseUrl)
# Here, we're just importing both Beautiful Soup and the Requests library
# this is the url that we've already determined is safe and legal to scrape from.
page_response = requests.get(baseUrl, timeout=5)
# here, we fetch the content from the url, using the requests library
soup = BeautifulSoup(page_response.content, "html.parser")
#we use the html parser to parse the url content and store it in a variable.
section = soup.find('section', attrs={'class':'products'})
dataToCsv = []
for productContainer in section:
try:
productName = productContainer.find("span", attrs={'class':'name'}).text
price = productContainer.find("span", attrs={'class':'price'}).text
reviews = productContainer.find("div", attrs={'class':'total-ratings'}).text
brand = productContainer.find("span", attrs={'class':'brand'}).text
except Exception as e:
productName = "N"
price = "N"
reviews = "N"
brand = "N"
if productName != 'N':
# print(hotelDesc)
# print(productName)
# print(price) #remove ksh
# print(brand) #remove ksh
# print(reviews) #remove brackets
dataToCsv.append([productName,brand.replace('\xa0', ''),price[4:],reviews.strip("()")])
#empty line
# print("==============================")
#print(dataToCsv)
with open(seller+'_'+pagination+'.csv', 'w', newline='') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(dataToCsv)
csvFile.close()