Skip to content
This repository has been archived by the owner on Jul 8, 2024. It is now read-only.

Commit

Permalink
0.0.5. Random choice of User-Agent header
Browse files Browse the repository at this point in the history
  • Loading branch information
Mottl committed Nov 1, 2018
1 parent 26b0f00 commit 33555a7
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 7 deletions.
2 changes: 1 addition & 1 deletion GetOldTweets3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import models
from . import manager

__version__ = '0.0.4'
__version__ = '0.0.5'
21 changes: 17 additions & 4 deletions GetOldTweets3/manager/TweetManager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

import json, re, datetime, sys, http.cookiejar
import json, re, datetime, sys, random, http.cookiejar
import urllib.request, urllib.parse, urllib.error
from pyquery import PyQuery
from .. import models
Expand All @@ -10,6 +10,17 @@ class TweetManager:
def __init__(self):
pass

user_agents = [
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15',
]

@staticmethod
def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None, debug=False):
"""Get tweets that match the tweetCriteria parameter
Expand All @@ -26,6 +37,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None, d
results = []
resultsAux = []
cookieJar = http.cookiejar.CookieJar()
user_agent = random.choice(TweetManager.user_agents)

all_usernames = []
usernames_per_batch = 20
Expand All @@ -50,7 +62,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None, d

active = True
while active:
json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, debug=debug)
json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, user_agent, debug=debug)
if len(json['items_html'].strip()) == 0:
break

Expand Down Expand Up @@ -119,7 +131,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None, d
return results

@staticmethod
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, debug=False):
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, useragent=None, debug=False):
"""Invoke an HTTP query to Twitter.
Should not be used as an API function. A static method.
"""
Expand Down Expand Up @@ -153,10 +165,11 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, debug=False):
else:
urlLang = ''
url = url % (urllib.parse.quote(urlGetData.strip()), urlLang, urllib.parse.quote(refreshCursor))
useragent = useragent or TweetManager.user_agents[0]

headers = [
('Host', "twitter.com"),
('User-Agent', "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0"),
('User-Agent', useragent),
('Accept', "application/json, text/javascript, */*; q=0.01"),
('Accept-Language', "en-US,en;q=0.5"),
('X-Requested-With', "XMLHttpRequest"),
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ A Python 3 library and a corresponding command line utility for accessing old tw
![Python 3x](https://img.shields.io/badge/python-3.x-blue.svg)
[![Build Status](https://travis-ci.org/Mottl/GetOldTweets3.svg?branch=master)](https://travis-ci.org/Mottl/GetOldTweets3)

GetOldTweets3 is an improvement fork of the original Jefferson Henrique's [GetOldTweets-python](https://github.com/Jefferson-Henrique/GetOldTweets-python). It fixes issues with Python 3 and adds features such as searching tweets over multiple users accounts. Python 2 is not supported.
GetOldTweets3 is an improvement fork of the original Jefferson Henrique's [GetOldTweets-python](https://github.com/Jefferson-Henrique/GetOldTweets-python). It fixes known issues and adds features such as searching tweets over multiple users accounts. GetOldTweets3 supports only Python 3.

## Details
Twitter Official API has the bother limitation of time constraints, you can't get older tweets than a week. Some tools provide access to older tweets but in the most of them you have to spend some money before.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setuptools.setup(
name="GetOldTweets3",
version="0.0.4",
version="0.0.5",
author="Dmitry Mottl",
author_email="[email protected]",
license='MIT',
Expand Down

0 comments on commit 33555a7

Please sign in to comment.