Skip to content
This repository has been archived by the owner on Jul 8, 2024. It is now read-only.

Commit

Permalink
Show debug information with --debug option
Browse files Browse the repository at this point in the history
  • Loading branch information
Mottl committed Oct 29, 2018
1 parent b2ffa85 commit d0593e1
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 16 deletions.
4 changes: 3 additions & 1 deletion GetOldTweets3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from . import models
from . import manager
from . import manager

__version__ = '0.0.4'
22 changes: 18 additions & 4 deletions GetOldTweets3/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self):
pass

@staticmethod
def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None, debug=False):
"""Get tweets that match the tweetCriteria parameter
A static method.
Expand All @@ -21,6 +21,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
receiveBuffer : callable, a function that will be called upon a getting next `bufferLength' tweets
bufferLength: int, the number of tweets to pass to `receiveBuffer' function
proxy: str, a proxy server to use
debug: bool, output debug information
"""
results = []
resultsAux = []
Expand Down Expand Up @@ -49,7 +50,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):

active = True
while active:
json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy)
json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, debug=debug)
if len(json['items_html'].strip()) == 0:
break

Expand Down Expand Up @@ -118,7 +119,7 @@ def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
return results

@staticmethod
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy, debug=False):
"""Invoke an HTTP query to Twitter.
Should not be used as an API function. A static method.
"""
Expand Down Expand Up @@ -167,6 +168,9 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookieJar))
opener.addheaders = headers

if debug:
print(url)
print('\n'.join(h[0]+': '+h[1] for h in headers))
try:
response = opener.open(url)
jsonResponse = response.read()
Expand All @@ -176,6 +180,16 @@ def getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy):
sys.exit()
return

dataJson = json.loads(jsonResponse.decode())
s_json = jsonResponse.decode()

try:
dataJson = json.loads(s_json)
except:
print("Error parsing JSON: %s" % s_json)
sys.exit()

if debug:
print(s_json)
print("---\n")

return dataJson
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ GetOldTweets3 --username "barackobama" --maxtweets 1

**Get tweets by several usernames** (use multiple --username options or a comma/space separated list):
``` bash
GetOldTweets3 --username "BarackObama,AngelaMerkeICDU" --username "WhiteHouse" --maxtweets 1
GetOldTweets3 --username "BarackObama,AngelaMerkeICDU" --username "WhiteHouse" --maxtweets 1
```
(check https://github.com/Mottl/influencers for some prepared lists of usernames)

Expand All @@ -47,7 +47,7 @@ GetOldTweets3 --querysearch "europe refugees" --maxtweets 1

**Get tweets by a username and bound dates:**
``` bash
GetOldTweets3 --username "barackobama" --since 2015-09-10 --until "2015-09-12 23:30:15" --maxtweets 1
GetOldTweets3 --username "barackobama" --since 2015-09-10 --until 2015-09-12 --maxtweets 1
```

**Get the last 10 top tweets by a username:**
Expand All @@ -74,8 +74,8 @@ GetOldTweets3 --username "barackobama" --maxtweets 10 --toptweets

- **TwitterCriteria:** A collection of search parameters to be used together with **TweetManager**.
- setUsername (str or iterable): An optional specific username(s) from a twitter account (with or without "@").
- setSince (str. "yyyy-mm-dd" or "yyyy-mm-dd HH:MM:SS"): A lower bound date/time in UTC to restrict search.
- setUntil (str. "yyyy-mm-dd" or "yyyy-mm-dd HH:MM:SS"): An upper bound date/time in UTC to restrict search.
- setSince (str. "yyyy-mm-dd"): A lower bound date (UTC) to restrict search.
- setUntil (str. "yyyy-mm-dd"): An upper bound date (not included) to restrict search.
- setQuerySearch (str): A query text to be matched.
- setTopTweets (bool): If True only the Top Tweets will be retrieved.
- setNear(str): A reference location area from where tweets were generated.
Expand Down Expand Up @@ -107,7 +107,7 @@ print(tweet.text)
``` python
tweetCriteria = got.manager.TweetCriteria().setUsername("barackobama")\
.setSince("2015-09-10")\
.setUntil("2015-09-12 23:30:15")\
.setUntil("2016-01-01")\
.setMaxTweets(1)
tweet = got.manager.TweetManager.getTweets(tweetCriteria)[0]
print(tweet.text)
Expand Down
22 changes: 16 additions & 6 deletions bin/GetOldTweets3
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
username: A username or a list of usernames (comma or space separated)
of a specific twitter account(s) (with or without @)
username-from-file: a file with a list of usernames,
since: A lower bound date in UTC (yyyy-mm-dd or yyyy-mm-dd HH:MM:SS)
until: An upper bound date in UTC (yyyy-mm-dd or yyyy-mm-dd HH:MM:SS)
since: A lower bound date in UTC (yyyy-mm-dd)
until: An upper bound date in UTC (yyyy-mm-dd) (not included)
querysearch: A query text to be matched
near: A reference location area from where tweets were generated
within: A distance radius from "near" location (e.g. 15mi)
Expand All @@ -27,8 +27,8 @@ GetOldTweets3 --usernames-from-file userlist.txt --usernames-from-file additinal
# Example 4 - Get tweets by query search
GetOldTweets3 --querysearch "europe refugees" --maxtweets 10
# Example 5 - Get tweets by username and bound dates
GetOldTweets3 --username "barackobama" --since 2015-09-10 --until "2015-09-12 23:30:15" --maxtweets 10
# Example 5 - Get tweets by username and bound dates (until date is not included)
GetOldTweets3 --username "barackobama" --since 2015-09-10 --until 2015-09-12 --maxtweets 10
# Example 6 - Get the last 10 top tweets by username
GetOldTweets3 --username "barackobama" --maxtweets 10 --toptweets
Expand All @@ -37,6 +37,7 @@ GetOldTweets3 --username "barackobama" --maxtweets 10 --toptweets
import os, sys, re, getopt
if sys.version_info[0] < 3:
raise Exception("Python 2.x is not supported. Please upgrade to 3.x")

import GetOldTweets3 as got

def main(argv):
Expand All @@ -58,11 +59,13 @@ def main(argv):
"querysearch=",
"toptweets",
"maxtweets=",
"output="))
"output=",
"debug"))

tweetCriteria = got.manager.TweetCriteria()
outputFileName = "output_got.csv"

debug = False
usernames = set()
username_files = set()
for opt, arg in opts:
Expand Down Expand Up @@ -98,6 +101,13 @@ def main(argv):
elif opt == '--output':
outputFileName = arg

elif opt == '--debug':
debug = True

if debug:
print(' '.join(sys.argv))
print("GetOldTweets3", got.__version__)

if username_files:
for uf in username_files:
if not os.path.isfile(uf):
Expand Down Expand Up @@ -150,7 +160,7 @@ def main(argv):
print(cnt, end=' ', flush=True)

print("Downloading tweets...")
got.manager.TweetManager.getTweets(tweetCriteria, receiveBuffer)
got.manager.TweetManager.getTweets(tweetCriteria, receiveBuffer, debug=debug)

except getopt.GetoptError as err:
print('Arguments parser error, try -h')
Expand Down

0 comments on commit d0593e1

Please sign in to comment.